Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 20 Jun 2025 16:40:39 GMT
From:      Mark Johnston <markj@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: 770fba248daf - main - localedef: Use consistent sorting order
Message-ID:  <202506201640.55KGed3U074668@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=770fba248daff6cc72ce0bfbd1f2a941c90dc99a

commit 770fba248daff6cc72ce0bfbd1f2a941c90dc99a
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2025-06-20 16:10:37 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2025-06-20 16:39:22 +0000

    localedef: Use consistent sorting order
    
    In several modules we build up an RB tree keyed by wide characters.
    wchar_t has different signedness on different platforms, so iteration
    over such a tree results in platform-dependent ordering.
    
    The ctype module uses this ordering when writing the output file, which
    creates reproducibility problems when comparing the results of cross
    builds and native builds (e.g., native amd64 vs. cross-building on
    arm64).
    
    Modify such comparisons to always be unsigned.  Introduce a helper
    function for this purpose.  In the other modules I believe the sort
    order does not affect program output.
    
    On systems with signed wchar_t, of the files in /usr/share/locale this
    only affects zh_CN.GB18030/LC_CTYPE.
    
    MFC after:      2 weeks
    Sponsored by:   The FreeBSD Foundation
    Sponsored by:   Klara, Inc.
    Differential Revision:  https://reviews.freebsd.org/D50756
---
 usr.bin/localedef/charmap.c   |  2 +-
 usr.bin/localedef/collate.c   |  2 +-
 usr.bin/localedef/ctype.c     |  2 +-
 usr.bin/localedef/localedef.h | 15 +++++++++++++++
 4 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/usr.bin/localedef/charmap.c b/usr.bin/localedef/charmap.c
index a8d723163e74..7fb40845aee3 100644
--- a/usr.bin/localedef/charmap.c
+++ b/usr.bin/localedef/charmap.c
@@ -236,7 +236,7 @@ cmap_compare_wc(const void *n1, const void *n2)
 	const charmap_t *c1 = n1;
 	const charmap_t *c2 = n2;
 
-	return ((c1->wc < c2->wc) ? -1 : (c1->wc > c2->wc) ? 1 : 0);
+	return (wchar_cmp(c1->wc, c2->wc));
 }
 
 void
diff --git a/usr.bin/localedef/collate.c b/usr.bin/localedef/collate.c
index 830235036044..3dc70c7adf94 100644
--- a/usr.bin/localedef/collate.c
+++ b/usr.bin/localedef/collate.c
@@ -422,7 +422,7 @@ collchar_compare(const void *n1, const void *n2)
 	wchar_t	k1 = ((const collchar_t *)n1)->wc;
 	wchar_t	k2 = ((const collchar_t *)n2)->wc;
 
-	return (k1 < k2 ? -1 : k1 > k2 ? 1 : 0);
+	return (wchar_cmp(k1, k2));
 }
 
 RB_GENERATE_STATIC(collchars, collchar, entry, collchar_compare);
diff --git a/usr.bin/localedef/ctype.c b/usr.bin/localedef/ctype.c
index ab7b76e57b2d..f86a267b0b32 100644
--- a/usr.bin/localedef/ctype.c
+++ b/usr.bin/localedef/ctype.c
@@ -93,7 +93,7 @@ ctype_compare(const void *n1, const void *n2)
 	const ctype_node_t *c1 = n1;
 	const ctype_node_t *c2 = n2;
 
-	return (c1->wc < c2->wc ? -1 : c1->wc > c2->wc ? 1 : 0);
+	return (wchar_cmp(c1->wc, c2->wc));
 }
 
 void
diff --git a/usr.bin/localedef/localedef.h b/usr.bin/localedef/localedef.h
index 34299033214c..4e5c1884a8fb 100644
--- a/usr.bin/localedef/localedef.h
+++ b/usr.bin/localedef/localedef.h
@@ -38,6 +38,7 @@
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <wchar.h>
 
 extern int com_char;
 extern int esc_char;
@@ -173,5 +174,19 @@ void werr(const char *, ...);
 const char *get_wide_encoding(void);
 int max_wide(void);
 
+/*
+ * A helper function to compare wide characters when sorting.  Forcibly cast to
+ * an unsigned type to help ensure that output is consistent no matter the
+ * signedness of wchar_t.
+ */
+static inline int
+wchar_cmp(const wchar_t a, const wchar_t b)
+{
+	return ((uint32_t)a < (uint32_t)b ? -1 :
+	    ((uint32_t)a > (uint32_t)b ? 1 : 0));
+}
+_Static_assert(sizeof(wchar_t) == sizeof(uint32_t),
+    "wchar_t must be 32 bits wide");
+
 //#define	_(x)	gettext(x)
 #define	INTERR	fprintf(stderr,"internal fault (%s:%d)\n", __FILE__, __LINE__)



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202506201640.55KGed3U074668>