From owner-freebsd-ports-bugs@FreeBSD.ORG Mon May 29 14:30:19 2006 Return-Path: X-Original-To: freebsd-ports-bugs@hub.freebsd.org Delivered-To: freebsd-ports-bugs@hub.freebsd.org Received: from mx1.FreeBSD.org (mx1.freebsd.org [216.136.204.125]) by hub.freebsd.org (Postfix) with ESMTP id 2BB9E16A549 for ; Mon, 29 May 2006 14:30:19 +0000 (UTC) (envelope-from gnats@FreeBSD.org) Received: from freefall.freebsd.org (freefall.freebsd.org [216.136.204.21]) by mx1.FreeBSD.org (Postfix) with ESMTP id 81BD843D53 for ; Mon, 29 May 2006 14:30:18 +0000 (GMT) (envelope-from gnats@FreeBSD.org) Received: from freefall.freebsd.org (gnats@localhost [127.0.0.1]) by freefall.freebsd.org (8.13.4/8.13.4) with ESMTP id k4TEUI05045247 for ; Mon, 29 May 2006 14:30:18 GMT (envelope-from gnats@freefall.freebsd.org) Received: (from gnats@localhost) by freefall.freebsd.org (8.13.4/8.13.4/Submit) id k4TEUI4D045246; Mon, 29 May 2006 14:30:18 GMT (envelope-from gnats) Resent-Date: Mon, 29 May 2006 14:30:18 GMT Resent-Message-Id: <200605291430.k4TEUI4D045246@freefall.freebsd.org> Resent-From: FreeBSD-gnats-submit@FreeBSD.org (GNATS Filer) Resent-To: freebsd-ports-bugs@FreeBSD.org Resent-Reply-To: FreeBSD-gnats-submit@FreeBSD.org, "Khairil Yusof" Received: from mx1.FreeBSD.org (mx1.freebsd.org [216.136.204.125]) by hub.freebsd.org (Postfix) with ESMTP id 4833716B04F for ; Mon, 29 May 2006 14:23:31 +0000 (UTC) (envelope-from kaeru@inigo-tech.com) Received: from mail.inigo-tech.com (gambit.inigo-tech.com [202.190.199.49]) by mx1.FreeBSD.org (Postfix) with ESMTP id 9255143D46 for ; Mon, 29 May 2006 14:23:30 +0000 (GMT) (envelope-from kaeru@inigo-tech.com) Received: from wolverine.inigo-tech.com (unknown [218.208.205.1]) by mail.inigo-tech.com (Postfix) with ESMTP id 57E3F250439 for ; Mon, 29 May 2006 22:23:23 +0800 (MYT) Message-Id: <1148916258.71561@wolverine.inigo-tech.com> Date: Mon, 29 May 2006 22:24:18 +0700 From: "Khairil Yusof" To: "FreeBSD gnats submit" X-Send-Pr-Version: gtk-send-pr 0.4.7 Cc: Subject: ports/98090: [patch] multimedia/gstreamer-plugins-good: Fix problems reading unicode id3 tags X-BeenThere: freebsd-ports-bugs@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: Ports bug reports List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 29 May 2006 14:30:19 -0000 >Number: 98090 >Category: ports >Synopsis: [patch] multimedia/gstreamer-plugins-good: Fix problems reading unicode id3 tags >Confidential: no >Severity: non-critical >Priority: low >Responsible: freebsd-ports-bugs >State: open >Quarter: >Keywords: >Date-Required: >Class: update >Submitter-Id: current-users >Arrival-Date: Mon May 29 14:30:17 GMT 2006 >Closed-Date: >Last-Modified: >Originator: Khairil Yusof >Release: FreeBSD 6.1-RELEASE i386 >Organization: >Environment: System: FreeBSD 6.1-RELEASE #17: Sun May 14 11:28:23 ICT 2006 kaeru@wolverine.inigo-tech.com:/tmp/obj/usr/src/sys/WOLVERINE >Description: Fix obtained from upstream: http://bugzilla.gnome.org/show_bug.cgi?id=341774 * gst/id3demux/id3v2frames.c: (find_utf16_bom), (parse_insert_string_field), (parse_split_strings): Rework string parsing to always walk over BOM markers in UTF16 strings, using the endianness indicated by the innermost one, then trying the opposite endianness if that fails to convert to valid UTF-8. Fixes #341774 >How-To-Repeat: >Fix: --- patch-id3v2frames.c begins here --- diff -u -r1.16 id3v2frames.c --- gst/id3demux/id3v2frames.c 10 May 2006 13:51:01 -0000 1.16 +++ gst/id3demux/id3v2frames.c 15 May 2006 09:48:46 -0000 @@ -667,33 +667,21 @@ return result; } -static void -parse_insert_string_field (const gchar * encoding, gchar * data, gint data_size, - GArray * fields) -{ - gchar *field; - - field = g_convert (data, data_size, "UTF-8", encoding, NULL, NULL, NULL); - if (field && !g_utf8_validate (field, -1, NULL)) { - GST_DEBUG ("%s was bad UTF-8. Ignoring", field); - g_free (field); - field = NULL; - } - if (field) - g_array_append_val (fields, field); -} +static const gchar utf16enc[] = "UTF-16"; +static const gchar utf16leenc[] = "UTF-16LE"; +static const gchar utf16beenc[] = "UTF-16BE"; static gboolean -has_utf16_bom (gchar * data, const gchar ** p_in_encoding) +find_utf16_bom (gchar * data, const gchar ** p_in_encoding) { guint16 marker = (GST_READ_UINT8 (data) << 8) | GST_READ_UINT8 (data + 1); switch (marker) { case 0xFFFE: - *p_in_encoding = "UTF16LE"; + *p_in_encoding = utf16leenc; return TRUE; case 0xFEFF: - *p_in_encoding = "UTF16BE"; + *p_in_encoding = utf16beenc; return TRUE; default: break; @@ -702,6 +690,63 @@ } static void +parse_insert_string_field (guint8 encoding, gchar * data, gint data_size, + GArray * fields) +{ + gchar *field = NULL; + + switch (encoding) { + case ID3V2_ENCODING_UTF16: + case ID3V2_ENCODING_UTF16BE: + { + const gchar *in_encode; + + if (encoding == ID3V2_ENCODING_UTF16) + in_encode = utf16enc; + else + in_encode = utf16beenc; + + /* Sometimes we see strings with multiple BOM markers at the start. + * In that case, we assume the innermost one is correct. If that fails + * to produce valid UTF-8, we try the other endianness anyway */ + while (data_size > 2 && find_utf16_bom (data, &in_encode)) { + data += 2; /* skip BOM */ + data_size -= 2; + } + + field = g_convert (data, data_size, "UTF-8", in_encode, NULL, NULL, NULL); + + if (field == NULL || g_utf8_validate (field, -1, NULL) == FALSE) { + /* As a fallback, try interpreting UTF-16 in the other endianness */ + if (in_encode == utf16beenc) + field = g_convert (data, data_size, "UTF-8", utf16leenc, + NULL, NULL, NULL); + } + } + + break; + case ID3V2_ENCODING_ISO8859: + field = g_convert (data, data_size, "UTF-8", "ISO-8859-1", + NULL, NULL, NULL); + break; + default: + field = g_strndup (data, data_size); + break; + } + + if (field) { + if (g_utf8_validate (field, -1, NULL)) { + g_array_append_val (fields, field); + return; + } + + GST_DEBUG ("%s was bad UTF-8 after conversion from encoding %d. Ignoring", + field, encoding); + g_free (field); + } +} + +static void parse_split_strings (guint8 encoding, gchar * data, gint data_size, GArray ** out_fields) { @@ -715,13 +760,13 @@ case ID3V2_ENCODING_ISO8859: for (text_pos = 0; text_pos < data_size; text_pos++) { if (data[text_pos] == 0) { - parse_insert_string_field ("ISO-8859-1", data + prev, + parse_insert_string_field (encoding, data + prev, text_pos - prev + 1, fields); prev = text_pos + 1; } } if (data_size - prev > 0 && data[prev] != 0x00) { - parse_insert_string_field ("ISO-8859-1", data + prev, + parse_insert_string_field (encoding, data + prev, data_size - prev, fields); } @@ -729,34 +774,24 @@ case ID3V2_ENCODING_UTF8: for (prev = 0, text_pos = 0; text_pos < data_size; text_pos++) { if (data[text_pos] == '\0') { - parse_insert_string_field ("UTF-8", data + prev, + parse_insert_string_field (encoding, data + prev, text_pos - prev + 1, fields); prev = text_pos + 1; } } if (data_size - prev > 0 && data[prev] != 0x00) { - parse_insert_string_field ("UTF-8", data + prev, + parse_insert_string_field (encoding, data + prev, data_size - prev, fields); } break; case ID3V2_ENCODING_UTF16: case ID3V2_ENCODING_UTF16BE: { - const gchar *in_encode; - - if (encoding == ID3V2_ENCODING_UTF16) - in_encode = "UTF-16"; - else - in_encode = "UTF-16BE"; - /* Find '\0\0' terminator */ for (text_pos = 0; text_pos < data_size - 1; text_pos += 2) { if (data[text_pos] == '\0' && data[text_pos + 1] == '\0') { - if (has_utf16_bom (data + prev, &in_encode)) { - prev += 2; /* skip BOM */ - } /* found a delimiter */ - parse_insert_string_field (in_encode, data + prev, + parse_insert_string_field (encoding, data + prev, text_pos - prev + 2, fields); text_pos++; /* Advance to the 2nd NULL terminator */ prev = text_pos + 1; @@ -765,11 +800,8 @@ } if (data_size - prev > 1 && (data[prev] != 0x00 || data[prev + 1] != 0x00)) { - if (has_utf16_bom (data + prev, &in_encode)) { - prev += 2; /* skip BOM */ - } /* There were 2 or more non-null chars left, convert those too */ - parse_insert_string_field (in_encode, data + prev, + parse_insert_string_field (encoding, data + prev, data_size - prev, fields); } break; --- patch-id3v2frames.c ends here --- >Release-Note: >Audit-Trail: >Unformatted: