Date: Fri, 23 Jul 2021 15:07:01 GMT From: Baptiste Daroussin <bapt@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org Subject: git: fb25fdcaa34f - main - locales: only generate unicode locales Message-ID: <202107231507.16NF71Vj044302@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch main has been updated by bapt: URL: https://cgit.FreeBSD.org/src/commit/?id=fb25fdcaa34f35a4c984b2da12f251fce3d75b0a commit fb25fdcaa34f35a4c984b2da12f251fce3d75b0a Author: Baptiste Daroussin <bapt@FreeBSD.org> AuthorDate: 2021-07-23 14:10:24 +0000 Commit: Baptiste Daroussin <bapt@FreeBSD.org> CommitDate: 2021-07-23 14:58:20 +0000 locales: only generate unicode locales --- tools/tools/locale/Makefile | 82 ++++++------------------------------ tools/tools/locale/etc/charmaps.xml | 47 --------------------- tools/tools/locale/tools/cldr2def.pl | 35 --------------- tools/tools/locale/tools/finalize | 34 +-------------- 4 files changed, 14 insertions(+), 184 deletions(-) diff --git a/tools/tools/locale/Makefile b/tools/tools/locale/Makefile index 92f890b2f4d3..0efca83a971e 100644 --- a/tools/tools/locale/Makefile +++ b/tools/tools/locale/Makefile @@ -32,33 +32,9 @@ tools-test: KNOWN= monetdef numericdef msgdef colldef ctypedef # timedef TYPES?= ${KNOWN} -COLLATION_SPECIAL?= \ - cs_CZ ISO8859-2 \ - da_DK ISO8859-1 \ - da_DK ISO8859-15 \ - hr_HR ISO8859-2 \ - hu_HU ISO8859-2 \ - nb_NO ISO8859-1 \ - nb_NO ISO8859-15 \ - sk_SK ISO8859-2 \ - sr_Latn_RS ISO8859-2 \ - sr_Cyrl_RS ISO8859-5 \ - zh_Hans_CN GB2312 \ - zh_Hans_CN eucCN \ - zh_Hant_TW Big5 \ - zh_Hans_CN GB18030 \ - zh_Hans_CN GBK \ - ja_JP eucJP \ - nn_NO ISO8859-15 \ - nn_NO ISO8859-1 - -.for area enc in ${COLLATION_SPECIAL} -COLLATIONS_SPECIAL_ENV+= ${area}.${enc} -.endfor SETENV= env -i \ PATH="${PATH}" \ TMPDIR="${TMPDIR}" \ - COLLATIONS_SPECIAL="${COLLATIONS_SPECIAL_ENV}" \ UNIDIR="${UNIDIR}" \ BASEDIR="${BASEDIR}" \ TOOLSDIR="${TOOLSDIR}" \ @@ -89,16 +65,22 @@ diff-${t}: .endfor install: -.for t in ${TYPES} +.for t in ${TYPES:Nctypedef} . if ${KNOWN:M${t}} install: install-${t} install-${t}: - cd ${LOCALESRCDIR}/${t} && \ + cd ${LOCALESRCDIR}/${t}_unicode && \ rm -f Makefile *.src && \ cd ${.OBJDIR} && \ - install -m 644 ${t}/* ${LOCALESRCDIR}/${t} + install -m 644 ${t}/* ${LOCALESRCDIR}/${t}_unicode . endif .endfor +install: install-ctypedef +install-ctypedef: + cd ${LOCALESRCDIR}/ctypedef && \ + rm -f C.UTF-8.src && \ + cd ${.OBJDIR} && \ + install -m 644 ctypedef/C.UTF-8.src ${LOCALESRCDIR}/ctypedef post-install: .for t in ${TYPES} @@ -121,15 +103,6 @@ build-${t}: ${t} ${SETENV} OUTBASEDIR="${.OBJDIR}/${t}" ${TOOLSDIR}/finalize ${t} .endfor -static-colldef: colldef -build-colldef: static-colldef - -static-colldef: -.for area enc in ${COLLATION_SPECIAL} - awk -f ${TOOLSDIR}/extract-colldef.awk \ - posix/${area}.${enc}.src > colldef.draft/${area}.${enc}.src -.endfor - BASE_LOCALES_OF_INTEREST?= \ af_ZA am_ET ar_AE ar_EG ar_JO ar_MA ar_QA ar_SA \ be_BY bg_BG ca_AD ca_ES ca_FR ca_IT \ @@ -147,35 +120,14 @@ BASE_LOCALES_OF_INTEREST?= \ th_TH lo_LA bo_IN my_MM pa_Guru_IN ka_GE chr_US \ km_KH shi_Tfng_MA ii_CN vai_Vaii_LR vi_VN -ENCODINGS= Big5 \ - CP1251 \ - CP866 \ - CP949 \ - eucCN \ - eucJP \ - eucKR \ - GB18030 \ - GB2312 \ - GBK \ - ISO8859-1 \ - ISO8859-13 \ - ISO8859-15 \ - ISO8859-2 \ - ISO8859-5 \ - ISO8859-7 \ - ISO8859-9 \ - KOI8-R \ - KOI8-U \ - SJIS \ - US-ASCII \ - UTF-8 \ +ENCODINGS= UTF-8 \ UTF-32 # CLDR files CLDRFILES_CORE= https://unicode.org/Public/cldr/35/core.zip CLDRFILES_KEY= https://unicode.org/Public/cldr/35/keyboards.zip CLDRFILES_TOOLS=https://unicode.org/Public/cldr/35/tools.zip -CLDRFILES_UCD= http://www.unicode.org/Public/zipped/latest/UCD.zip +CLDRFILES_UCD= http://www.unicode.org/Public/zipped/13.0.0/UCD.zip # fetch and extract targets ${UNIDIR}: @@ -206,8 +158,8 @@ build-tools: JAVA_CLDR= java -DCLDR_DIR=${UNIDIR:Q} -jar ${UNIDIR}/tools/java/cldr.jar -posix: posixcm post-posixcm posixsrc posixcol -.ORDER: posixcm post-posixcm posixsrc posixcol +posix: posixcm post-posixcm posixsrc +.ORDER: posixcm post-posixcm posixsrc ${UNIDIR}/posix: ln -s -f ../posix ${.TARGET} clean-posix: @@ -232,14 +184,6 @@ posix/${area}.UTF-8.src: ${JAVA_CLDR} org.unicode.cldr.posix.GeneratePOSIX \ -d posix -m ${area} -c UTF-8 .endfor -.for area encoding in ${COLLATION_SPECIAL} -posixcol: build-tools posix/${area}.${encoding}.src -.ORDER: build-tools posix/${area}.${encoding}.src -posix/${area}.${encoding}.src: - mkdir -p posix && \ - ${JAVA_CLDR} org.unicode.cldr.posix.GeneratePOSIX \ - -d posix -m ${area} -c ${encoding} -.endfor # generate widths.txt using the data from libut8proc GETWIDTHS=${TOOLSDIR}/getwidths diff --git a/tools/tools/locale/etc/charmaps.xml b/tools/tools/locale/etc/charmaps.xml index 52e80f2dee05..9d42b1e5247c 100644 --- a/tools/tools/locale/etc/charmaps.xml +++ b/tools/tools/locale/etc/charmaps.xml @@ -28,169 +28,122 @@ --> <language name="af" - encoding="ISO8859-1 ISO8859-15" countries="ZA" /> <language name="am" countries="ET" /> <!-- UTF-8 only --> <language name="ar" countries="AE EG JO MA QA SA" /> <language name="be" - encoding="CP1131 CP1251 ISO8859-5" countries="BY" /> <language name="bg" - encoding="CP1251" countries="BG" /> <language name="ca" fallback="ca_ES" - encoding="ISO8859-1 ISO8859-15" countries="AD ES FR IT" /> <!-- only ca_ES defined --> <language name="cs" - encoding="ISO8859-2" countries="CZ" /> <language name="da" - encoding="ISO8859-1 ISO8859-15" countries="DK" /> <language name="de" - encoding="ISO8859-1 ISO8859-15" countries="AT CH DE" /> <language name="el" - encoding="ISO8859-7" countries="GR" /> <language name="en" - encoding="ISO8859-1 ISO8859-15 US-ASCII" countries="GB" /> <language name="en" - encoding="ISO8859-1 ISO8859-15 US-ASCII" countries="AU CA NZ US ZA" /> <language name="en" - encoding="ISO8859-1 ISO8859-15" countries="IE" /> <language name="en" - encoding="ISO8859-1" countries="HK SG" /> <language name="en" countries="PH" /> <!-- UTF-8 only --> <language name="es" countries="CR" /> <!-- UTF-8 only --> <language name="es" - encoding="ISO8859-1 ISO8859-15" countries="ES" /> <language name="es" - encoding="ISO8859-1" countries="AR MX" /> <language name="et" - encoding="ISO8859-1 ISO8859-15" countries="EE" /> <language name="eu" - encoding="ISO8859-1 ISO8859-15" countries="ES" /> <language name="fi" - encoding="ISO8859-1 ISO8859-15" countries="FI" /> <language name="fr" - encoding="ISO8859-1 ISO8859-15" countries="BE CH FR" /> <language name="fr" - encoding="ISO8859-1 ISO8859-15" countries="CA" /> <language name="ga" countries="IE" /> <!-- UTF-8 only --> <language name="he" countries="IL" /> <language name="hi" - encoding="ISCII-DEV" countries="IN" /> <language name="hr" - encoding="ISO8859-2" countries="HR" /> <language name="hu" - encoding="ISO8859-2" countries="HU" /> <language name="hy" - encoding="ARMSCII-8" countries="AM" /> <language name="is" - encoding="ISO8859-1 ISO8859-15" countries="IS" /> <language name="it" - encoding="ISO8859-1 ISO8859-15" countries="CH IT" /> <language name="ja" - encoding="SJIS eucJP" countries="JP" /> <language name="kk" countries="KZ" /> <!-- PT154 not available, UTF-8 --> <language name="ko" - encoding="eucKR" - encoding_link="eucKR:CP949" countries="KR" /> <language name="lt" - encoding="ISO8859-13" countries="LT" /> <language name="lv" - encoding="ISO8859-13" countries="LV" /> <language name="mn" countries="MN" /> <language name="nb" - encoding="ISO8859-1 ISO8859-15" countries="NO" /> <language name="nl" - encoding="ISO8859-1 ISO8859-15" countries="BE NL" /> <language name="nn" - encoding="ISO8859-1 ISO8859-15" countries="NO" /> <language name="pl" - encoding="ISO8859-2" countries="PL" /> <language name="pt" - encoding="ISO8859-1 ISO8859-15" countries="PT" /> <language name="pt" - encoding="ISO8859-1" countries="BR" /> <language name="ro" - encoding="ISO8859-2" countries="RO" /> <language name="ru" - encoding="CP1251 CP866 ISO8859-5 KOI8-R" countries="RU" /> <language name="se" countries="NO FI" /> <language name="sk" - encoding="ISO8859-2" countries="SK" /> <language name="sl" - encoding="ISO8859-2" countries="SI" /> <language name="sr" family="Latn" - encoding="ISO8859-2" countries="RS" /> <language name="sr" family="Cyrl" - encoding="ISO8859-5" countries="RS" /> <language name="sv" - encoding="ISO8859-1 ISO8859-15" countries="SE FI" /> <language name="tr" - encoding="ISO8859-9" countries="TR" /> <language name="uk" - encoding="CP1251 ISO8859-5 KOI8-U" countries="UA" /> <language name="zh" family="Hans" - encoding="GB18030 GB2312 GBK eucCN" countries="CN" /> <language name="zh" family="Hant" countries="HK" /> <language name="zh" family="Hant" - encoding="Big5" countries="TW" /> </languages> diff --git a/tools/tools/locale/tools/cldr2def.pl b/tools/tools/locale/tools/cldr2def.pl index fd475db714a0..70e0bdad525b 100755 --- a/tools/tools/locale/tools/cldr2def.pl +++ b/tools/tools/locale/tools/cldr2def.pl @@ -65,7 +65,6 @@ my %values = (); my %hashtable = (); my %languages = (); my %translations = (); -my %encodings = (); my %alternativemonths = (); get_languages(); @@ -74,7 +73,6 @@ $utfmap{'UTF-8'} = {}; $utfmap{'UTF-32'} = {}; get_utfmap("$UNIDIR/posix/$DEFENCODING.cm", $utfmap{'UTF-8'}); get_utfmap("$UNIDIR/posix/UTF-32.cm", $utfmap{'UTF-32'}); -get_encodings("$ETCDIR/charmaps"); my %keys = (); tie(%keys, "Tie::IxHash"); @@ -384,44 +382,11 @@ sub resolve_enc_addition { return $ret; } -sub get_encodings { - my $dir = shift; - foreach my $e (sort(keys(%encodings))) { - if (!open(FIN, "$dir/$e.TXT")) { - print "Cannot open charmap for $e\n"; - next; - - } - $encodings{$e} = 1; - my @lines = <FIN>; - close(FIN); - chomp(@lines); - foreach my $l (@lines) { - $l =~ s/\r//; - next if ($l eq ""); - - my @a = split(" ", $l); - next if ($#a < 1); - next if ($a[0] =~ /^\#/ or $a[1] =~ /^\#/); - next if ($a[0] eq '' or $a[1] eq ''); - - $a[0] = resolve_enc_addition($a[0]); # local - $a[1] = resolve_enc_addition($a[1]); # UTF-32 - my $u32 = sprintf("%08X", hex($a[1])); -# print STDERR "$a[1] => $u32\n"; - - # Use UTF-32 as the indices. - $convertors{$e}{$u32} = uc($a[0]); - } - } -} - sub get_languages { my %data = get_xmldata($ETCDIR); %languages = %{$data{L}}; %translations = %{$data{T}}; %alternativemonths = %{$data{AM}}; - %encodings = %{$data{E}}; } sub transform_ctypes { diff --git a/tools/tools/locale/tools/finalize b/tools/tools/locale/tools/finalize index 88dfcad0cb24..207b97ff3cb5 100755 --- a/tools/tools/locale/tools/finalize +++ b/tools/tools/locale/tools/finalize @@ -82,7 +82,6 @@ for i in *_*_*.*.src; do nname=`echo $oldname | awk '{ split($0, a, "_"); print a[1]"_"a[3]"@"a[2];} '` mv -f ${oldname}.src ${nname}.src sed -i '' -e "s/${oldname}/${nname}/g" Makefile - COLLATIONS_SPECIAL=$(echo ${COLLATIONS_SPECIAL} | sed -e "s/${oldname}/${nname}/g") done # For variable without @modifier ambiguity do not keep the @modifier @@ -95,7 +94,6 @@ for i in *@*.src; do if [ $(ls ${shortname}@* | wc -l) -eq 1 ] ; then mv -f $i ${shortname}.src sed -i '' -e "s/${oldname}/${shortname}/g" Makefile - COLLATIONS_SPECIAL=$(echo ${COLLATIONS_SPECIAL} | sed -e "s/${oldname}/${shortname}/g") fi done @@ -106,7 +104,6 @@ for i in *@Latn.src; do fi mv -f ${i} ${i%@*}@latin.src sed -i '' -e "s/${i%.*}/${i%@*}@latin/g" Makefile - COLLATIONS_SPECIAL=$(echo ${COLLATIONS_SPECIAL} | sed -e "s/${i%.*}/${i%@*}@latin/g") done for i in *@Cyrl.src; do @@ -115,7 +112,6 @@ for i in *@Cyrl.src; do fi mv -f ${i} ${i%@*}@cyrillic.src sed -i '' -e "s/${i%.*}/${i%@*}@cyrillic/g" Makefile - COLLATIONS_SPECIAL=$(echo ${COLLATIONS_SPECIAL} | sed -e "s/${i%.*}/${i%@*}@cyrillic/g") done # On locales with multiple modifiers rename the "default" version without the @modifier @@ -150,30 +146,6 @@ then /usr/bin/sed -E -e 's/[ ]+/ /g' \ ${UNIDIR}/posix/UTF-8.cm \ > ${ETCDIR}/final-maps/map.UTF-8 - /usr/bin/sed -E -e 's/[ ]+/ /g' \ - ${UNIDIR}/posix/eucCN.cm \ - > ${ETCDIR}/final-maps/map.eucCN - /usr/bin/sed -E -e 's/[ ]+/ /g' \ - ${UNIDIR}/posix/eucCN.cm \ - > ${ETCDIR}/final-maps/map.GB2312 - - # GB18030 and Big5 are pre-generated from CLDR data - CHARMAPS="ARMSCII-8 CP1131 CP1251 \ - CP866 GBK ISCII-DEV ISO8859-1 \ - ISO8859-13 ISO8859-15 ISO8859-2 ISO8859-4 \ - ISO8859-5 ISO8859-7 ISO8859-9 KOI8-R KOI8-U \ - PT154 SJIS US-ASCII eucJP eucKR" - - for map in ${CHARMAPS} - do - encoding=${map} - env ETCDIR="${ETCDIR}" \ - /usr/local/bin/perl ${TOOLSDIR}/convert_map.pl \ - ${ETCDIR}/charmaps/${map}.TXT ${encoding} \ - | /usr/bin/sed -E -e 's/ +/ /g' \ - > ${ETCDIR}/final-maps/map.${map} - echo map ${map} converted. - done elif [ $1 = "colldef" ] then @@ -190,13 +162,9 @@ then sed -i '' "/^SAME.*$line$/d" ${old}/Makefile done echo "" >> ${TEMP4} - for enc in ${COLLATIONS_SPECIAL}; do - sed -i '' "/^.*${enc}$/d" ${TEMP4} - echo "LOCALES+= ${enc}" >> ${TEMP4} - done keep=$(cat ${TEMP} | awk '{ print $2 }') - for original in ${keep} ${COLLATIONS_SPECIAL} + for original in ${keep} do cp ${old}/${original}.src ${new}/ done
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202107231507.16NF71Vj044302>