Date: Sun, 26 Feb 2006 03:05:24 +0800 (CST) From: Xin LI <delphij@FreeBSD.org> To: FreeBSD-gnats-submit@FreeBSD.org Cc: doceng@FreeBSD.org, intron@intron.ac, hrs@FreeBSD.org, murray@FreeBSD.org, portmgr@FreeBSD.org Subject: ports/93836: [NEW PORT] chinese/zh-docproj Message-ID: <200602251905.k1PJ5OhG056456@tarsier.delphij.net> Resent-Message-ID: <200602251910.k1PJA45x010670@freefall.freebsd.org>
next in thread | raw e-mail | index | archive | help
>Number: 93836 >Category: ports >Synopsis: [NEW PORT] chinese/zh-docproj >Confidential: no >Severity: non-critical >Priority: low >Responsible: freebsd-ports-bugs >State: open >Quarter: >Keywords: >Date-Required: >Class: change-request >Submitter-Id: current-users >Arrival-Date: Sat Feb 25 19:10:04 GMT 2006 >Closed-Date: >Last-Modified: >Originator: Xin LI >Release: FreeBSD 6.1-PRERELEASE i386 >Organization: The FreeBSD Simplified Chinese Project >Environment: System: FreeBSD tarsier.delphij.net 6.1-PRERELEASE FreeBSD 6.1-PRERELEASE #9: Sat Feb 25 23:49:38 CST 2006 delphij@tarsier.delphij.net:/usr/obj/usr/src/sys/TARSIER i386 >Description: zh-docproj is a set of supportive tools that is used to correctly generate PDFs for far east languages. We have used these tools to help generate PDFs for the FreeBSD Chinese Project. The current snapshot of zh-docproj would support GB2312, GBK, BIG5, EUCJP, EUCKR and UTF-8 encodings. The code were taken from the FreeBSD Simplified Chinese Project CVS, mostly by intron <intron at intron dot ac>. I would be happy if the portmgr@ would give us an approval to add this port into the tree before the final release, which will make it possible to build PDF for the documentations translated into far east languages correctly. >How-To-Repeat: >Fix: --- zh-docproj.shar begins here --- # This is a shell archive. Save it in a file, remove anything before # this line, and then unpack it by entering "sh file". Note, it may # create directories; files and directories will be owned by you and # have default permissions. # # This archive contains: # # Makefile # pkg-descr # pkg-plist # src # src/Makefile # src/cjktexsty # src/cjktexsty/cjktexsty.l # src/cjktexsty/Makefile # src/fixrtf # src/fixrtf/Makefile # src/fixrtf/fixrtf.l # echo x - Makefile sed 's/^X//' >Makefile << 'END-of-Makefile' X# New ports collection makefile for: zh-docproj X# Date created: 26 Feb 2006 X# Whom: Xin LI <delphij@FreeBSD.org> X# X# $FreeBSD$ X# X# This port is self contained in the src directory. X# X XPORTNAME= docproj XPORTVERSION= 0.1.20060226 XCATEGORIES= chinese XMASTER_SITES= # none XPKGNAMEPREFIX= zh- XDISTFILES= # none X X# Updates from intron@intron.ac should also be considered X# as maintainer updates. XMAINTAINER= delphij@FreeBSD.org XCOMMENT= Supportive tools for Chinese docproj build X XLIB_DEPENDS= iconv:${PORTSDIR}/converters/libiconv \ X png.5:${PORTSDIR}/graphics/png X XWRKSRC= ${WRKDIR}/src X XSRC= ${.CURDIR}/src X Xdo-fetch: X @${DO_NADA} X Xpre-patch: X @${CP} -R ${SRC} ${WRKDIR} X X.include <bsd.port.mk> END-of-Makefile echo x - pkg-descr sed 's/^X//' >pkg-descr << 'END-of-pkg-descr' Xzh-docproj is a set of utilities that is used to build docproj PDFs. XThis utility is maintained by the FreeBSD Simplified Chinese Project. X XWWW: http://www.freebsd.org.cn END-of-pkg-descr echo x - pkg-plist sed 's/^X//' >pkg-plist << 'END-of-pkg-plist' Xbin/cjktexsty Xbin/fixrtf END-of-pkg-plist echo c - src mkdir -p src > /dev/null 2>&1 echo x - src/Makefile sed 's/^X//' >src/Makefile << 'END-of-src/Makefile' X# $FreeBSD$ X XSUBDIR= cjktexsty fixrtf X X.include <bsd.subdir.mk> END-of-src/Makefile echo c - src/cjktexsty mkdir -p src/cjktexsty > /dev/null 2>&1 echo x - src/cjktexsty/cjktexsty.l sed 's/^X//' >src/cjktexsty/cjktexsty.l << 'END-of-src/cjktexsty/cjktexsty.l' X%{ X/*- X * Copyright (c) 2005, 2006 intron <intron@intron.ac>. All rights reserved. X * Copyright (c) 2005, 2006 The FreeBSD Simplified Chinese Project. X * All rights reserved. X * X * This code is derived from software contributed to The FreeBSD Simplified X * Chinese Project by intron. X * X * Redistribution and use in source and binary forms, with or without X * modification, are permitted provided that the following conditions X * are met: X * 1. Redistributions of source code must retain the above copyright X * notice, this list of conditions and the following disclaimer. X * 2. Redistributions in binary form must reproduce the above copyright X * notice, this list of conditions and the following disclaimer in the X * documentation and/or other materials provided with the distribution. X * X * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND X * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE X * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE X * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE X * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL X * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS X * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) X * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT X * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY X * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF X * SUCH DAMAGE. X * X * CNPROJ: doc/zh_CN.GB2312/share/mk/cjktexsty.lex,v 1.1.1000.40 2006/02/19 20:32:32 intron Exp X */ X X#include <sys/cdefs.h> X__FBSDID("$FreeBSD$"); X X#include <err.h> X#include <stdio.h> X#include <string.h> X#include <unistd.h> X#include <iconv.h> X Xchar texencoding[128]="",*cjkencoding=NULL,cjkfont[128]=""; Xiconv_t iconvhandle; Xint ccmap_enable=0; X Xvoid Xerrexit(void) X{ X X errx(1, "Error: line %d", yylineno); X} X Xvoid Xtranscode(char *ch) X{ X char *pchar,*pout,input[16],output[128]; X const char *pin; X int c; X size_t lin,lout; X size_t outlen; X X pchar=strstr(ch,"{"); X if(pchar==NULL) X errexit(); X if(sscanf(pchar+1,"%d",&c)!=1) X errexit(); X X /* UCS-4 big endian, including not only Basic Multilingual Plane */ X input[0]=(c&0xff000000)>>24; X input[1]=(c&0xff0000)>>16; X input[2]=(c&0xff00)>>8; X input[3]=(c&0xff); X pin=input; X lin=4; X X pout=output; X lout=sizeof(output); X X iconv(iconvhandle,&pin,&lin,&pout,&lout); X X if(lin!=0) { X switch(c) { X case 8212: strcpy(output,"\\ensuremath{-}"); break; X case 8226: strcpy(output,"\\ensuremath{\\bullet}"); break; X case 8482: strcpy(output,"\\ensuremath{^{\\mathrm{TM}}}"); break; X case 10122: strcpy(output,"{\\large\\ding{202}}"); break; X case 10123: strcpy(output,"{\\large\\ding{203}}"); break; X case 10124: strcpy(output,"{\\large\\ding{204}}"); break; X case 10125: strcpy(output,"{\\large\\ding{205}}"); break; X case 10126: strcpy(output,"{\\large\\ding{206}}"); break; X case 10127: strcpy(output,"{\\large\\ding{207}}"); break; X case 10128: strcpy(output,"{\\large\\ding{208}}"); break; X case 10129: strcpy(output,"{\\large\\ding{209}}"); break; X case 10130: strcpy(output,"{\\large\\ding{210}}"); break; X case 10131: strcpy(output,"{\\large\\ding{211}}"); break; X case 10132: strcpy(output,"\\ensuremath{\\rightarrow}"); break; X case 65533: strcpy(output,"{\\large\\ding{96}}"); break; X default: X warnx("Unable to find a substitute for UNICODE character &#%d;", c); X strcpy(output,"??"); X break; X } X } else { X outlen=sizeof(output)-lout; X output[outlen]=0; X X if(outlen==2 && strcspn(output,"\\$&%#@{}^_~\x80")!=outlen) X { /* TeX special character */ X sprintf(output,"\\CJKchar{%u}{%u}", X (unsigned int)(unsigned char)output[0], X (unsigned int)(unsigned char)output[1] X ); X } X } X X printf("%s",output); X} X X%} X X%option yylineno X%option noyywrap X Xfotbegin \\FOT\{[^}]*\} Xfotend \\endFOT\{[^}]*\} Xcjk \\Character\{[0-9]{1,5}\} X X%% X X{fotbegin} { X /* X * A confusing but practical structure: X * X * \usepackage{CJK} X * \begin{CJK*}{GB}{song} X * \FOT{3} X * X * ... X * X * \end{CJK*} X * \endFOT{} X * X * The macro call \begin{CJK*} must be put before X * \FOT, or generated PDF will include many "@". X */ X printf("\\usepackage{textcomp}\n"); X printf("\\usepackage{pifont}\n"); X printf("\\usepackage{wasysym}\n"); X printf("\\usepackage{CJK}\n"); X if(ccmap_enable) printf("\\usepackage{ccmap}\n"); X printf("\\hypersetup{CJKbookmarks=true,hypertex,pdfauthor={FreeBSD Documentation Project}}\n"); X printf("\\begin{CJK*}{%s}{%s}\n%s\n",cjkencoding,cjkfont,yytext); X } X{fotend} { X /* X * \FOT does NOT include \begin{document}, X * while \endFOT includes \end{document} explicitly. X * Thus, \endFOT should NOT be put between X * \begin{CJK*} and \end{CJK*}, X * whether there is a \FOT between them or not. X */ X printf("\n\\end{CJK*}%s\n",yytext); X } X{cjk} { transcode(yytext); } X X[\xA0] { printf("{\\nobreakspace}"); } X[\xA1] { printf("{\\textexclamdown}"); } X[\xA2] { printf("{\\textcent}"); } X[\xA3] { printf("{\\pounds}"); } X[\xA4] { printf("{\\textcurrency}"); } X[\xA5] { printf("{\\textyen}"); } X[\xA6] { printf("{\\textbrokenbar}"); } X[\xA7] { printf("{\\S}"); } X[\xA8] { printf("{\\\"{}}"); } X[\xA9] { printf("{\\copyright}"); } X[\xAA] { printf("{\\textordfeminine}"); } X[\xAB] { printf("\\ensuremath{_{^{\\ll}}}"); } X[\xAC] { printf("\\ensuremath{\\lnot}"); } X[\xAD] { printf("{-}"); } X[\xAE] { printf("{\\textregistered}"); } X[\xAF] { printf("\\ensuremath{^{-}}"); } X[\xB0] { printf("{\\textdegree}"); } X[\xB1] { printf("\\ensuremath{\\pm}"); } X[\xB2] { printf("\\ensuremath{^{2}}"); } X[\xB3] { printf("\\ensuremath{^{3}}"); } X[\xB4] { printf("\\ensuremath{'}"); } X[\xB5] { printf("\\ensuremath{\\mu}"); } X[\xB6] { printf("{\\P}"); } X[\xB7] { printf("{\\ifmmode\\cdot\\else\\textperiodcentered\\fi}"); } X[\xB8] { printf("\\c{}"); } X[\xB9] { printf("\\ensuremath{^{1}}"); } X[\xBA] { printf("{\\textordmasculine}"); } X[\xBB] { printf("\\ensuremath{_{^{\\gg}}}"); } X[\xBC] { printf("{\\textonequarter}"); } X[\xBD] { printf("{\\textonehalf}"); } X[\xBE] { printf("{\\textthreequarters}"); } X[\xBF] { printf("{\\textquestiondown}"); } X[\xC0] { printf("\\ensuremath{\\grave{\\mathrm{A}}}"); } X[\xC1] { printf("\\ensuremath{\\acute{\\mathrm{A}}}"); } X[\xC2] { printf("{\\^A}"); } X[\xC3] { printf("{\\~A}"); } X[\xC4] { printf("{\\\"A}"); } X[\xC5] { printf("{\\AA}"); } X[\xC6] { printf("{\\AE}"); } X[\xC7] { printf("{\\c C}"); } X[\xC8] { printf("\\ensuremath{\\grave{\\mathrm{E}}}"); } X[\xC9] { printf("\\ensuremath{\\acute{\\mathrm{E}}}"); } X[\xCA] { printf("{\\^E}"); } X[\xCB] { printf("{\\\"E}"); } X[\xCC] { printf("\\ensuremath{\\grave{\\mathrm{I}}}"); } X[\xCD] { printf("\\ensuremath{\\acute{\\mathrm{I}}}"); } X[\xCE] { printf("{\\^I}"); } X[\xCF] { printf("{\\\"I}"); } X[\xD0] { printf("{\\DH}"); } X[\xD1] { printf("{\\~N}"); } X[\xD2] { printf("\\ensuremath{\\grave{\\mathrm{O}}}"); } X[\xD3] { printf("\\ensuremath{\\acute{\\mathrm{O}}}"); } X[\xD4] { printf("{\\^O}"); } X[\xD5] { printf("{\\~O}"); } X[\xD6] { printf("{\\\"O}"); } X[\xD7] { printf("\\ensuremath{\\times}"); } X[\xD8] { printf("{\\O}"); } X[\xD9] { printf("\\ensuremath{\\grave{\\mathrm{U}}}"); } X[\xDA] { printf("\\ensuremath{\\acute{\\mathrm{U}}}"); } X[\xDB] { printf("{\\^U}"); } X[\xDC] { printf("{\\\"U}"); } X[\xDD] { printf("\\ensuremath{\\acute{\\mathrm{Y}}}"); } X[\xDE] { printf("{\\Thorn}"); } X[\xDF] { printf("{\\ss}"); } X[\xE0] { printf("\\ensuremath{\\grave{\\mathrm{a}}}"); } X[\xE1] { printf("\\ensuremath{\\acute{\\mathrm{a}}}"); } X[\xE2] { printf("{\\^a}"); } X[\xE3] { printf("{\\~a}"); } X[\xE4] { printf("{\\\"a}"); } X[\xE5] { printf("{\\aa}"); } X[\xE6] { printf("{\\ae}"); } X[\xE7] { printf("{\\c c}"); } X[\xE8] { printf("\\ensuremath{\\grave{\\mathrm{e}}}"); } X[\xE9] { printf("\\ensuremath{\\acute{\\mathrm{e}}}"); } X[\xEA] { printf("{\\^e}"); } X[\xEB] { printf("{\\\"e}"); } X[\xEC] { printf("\\ensuremath{\\grave{\\mathrm{\\i}}}"); } X[\xED] { printf("\\ensuremath{\\acute{\\mathrm{\\i}}}"); } X[\xEE] { printf("{\\^\\i}"); } X[\xEF] { printf("{\\\"\\i}"); } X[\xF0] { printf("{\\dh}"); } X[\xF1] { printf("{\\~n}"); } X[\xF2] { printf("\\ensuremath{\\grave{\\mathrm{o}}}"); } X[\xF3] { printf("\\ensuremath{\\acute{\\mathrm{o}}}"); } X[\xF4] { printf("{\\^o}"); } X[\xF5] { printf("{\\~o}"); } X[\xF6] { printf("{\\\"o}"); } X[\xF7] { printf("\\ensuremath{\\div}"); } X[\xF8] { printf("{\\o}"); } X[\xF9] { printf("\\ensuremath{\\grave{\\mathrm{u}}}"); } X[\xFA] { printf("\\ensuremath{\\acute{\\mathrm{u}}}"); } X[\xFB] { printf("{\\^u}"); } X[\xFC] { printf("{\\\"u}"); } X[\xFD] { printf("\\ensuremath{\\acute{\\mathrm{y}}}"); } X[\xFE] { printf("{\\thorn}"); } X[\xFF] { printf("{\\\"y}"); } X X[\xa0-\xff] { X warnx("Unable to find a substitute for ISO8859-1 character \\x%X", X (unsigned int)(*((unsigned char *)yytext))); X printf("?"); X } X X%% X Xvoid printusage() X{ X fprintf(stderr, "Usage: cjktexsty [ -c ] -e encoding -f fontname\n" X " Convert TeX source including \\Character{xxxxx} generated by\n" X " Jade/OpenJade into what CJK-LaTeX can process.\n" X " \n" X "NOTE: Jade/OpenJade supports EUC-JP natively. Thus, this tool SHOULD NOT be\n" X " used in this case. This tool treats all bytes larger than 0xa0 as\n" X " ISO 8859-1 characters, and converts \\Character{xxxxx} into encoding\n" X " that CJK-LaTeX can process.\n" X " \n" X "Options:\n" X " -c\n" X " Use ccmap.sty for PDFTeX to generate text-copyable CJK PDF.\n" X " The package ccmap.sty is written by Wenchang Sun and Linbo Zhang.\n" X " See also ftp://ftp.cc.ac.cn/pub/cct/ for details.\n" X " -e encoding\n" X " Specify TeX source encoding for CJK-LaTeX.\n" X " -f fontname\n" X " Specify font name in CJK macro call, such as\n" X " \\begin{CJK*}{encoding}{font}.\n" X " \n" X "CJK-LaTeX supported combinations by default:\n" X " <TeX source encoding> <CJK encoding name> <CJK font name>\n" X " ------------------------------------------------------------\n" X " GB2312 GB song\n" X " GBK GBK song\n" X " BIG5 Bg5 bsmi\n" X " EUCJP JIS min\n" X " EUCKR KS \n" X " UTF-8 UTF8 song\n" X ); X} X Xint Xmain(int argc, char *argv[]) X{ X int ch; X X while ((ch = getopt(argc, argv, "ce:f:")) != -1) X { X switch (ch) X { X case 'c': X ccmap_enable=1; X break; X case 'e': X if(strcmp(optarg,"GB2312")==0) cjkencoding="GB"; X else if(strcmp(optarg,"GBK")==0) cjkencoding="GBK"; X else if(strcmp(optarg,"GB18030")==0) cjkencoding="GBK"; /* Not supported by CJK yet */ X else if(strcmp(optarg,"BIG5")==0) cjkencoding="Bg5"; X else if(strcmp(optarg,"EUCJP")==0) cjkencoding="JIS"; X else if(strcmp(optarg,"EUCKR")==0) cjkencoding="KS"; X else if(strcmp(optarg,"UTF-8")==0) cjkencoding="UTF8"; X else cjkencoding=NULL; X if(cjkencoding!=NULL) strlcpy(texencoding,optarg,sizeof(texencoding)); X break; X case 'f': X strlcpy(cjkfont,optarg,sizeof(cjkfont)); X break; X default: X printusage(); X return 1; X break; X } X } X X if(cjkencoding==NULL) X { X printusage(); X return 1; X } X X iconvhandle=iconv_open(texencoding,"UCS-4BE"); X yylex(); X iconv_close(iconvhandle); X return 0; X} END-of-src/cjktexsty/cjktexsty.l echo x - src/cjktexsty/Makefile sed 's/^X//' >src/cjktexsty/Makefile << 'END-of-src/cjktexsty/Makefile' X# $FreeBSD$ X XPROG= cjktexsty XSRCS= cjktexsty.l X XPREFIX?= /usr/local XBINDIR= ${PREFIX}/bin XCFLAGS+=-I${PREFIX}/include XLDADD= -L${PREFIX}/lib -liconv XNO_MAN= XNOMAN= X X.include <bsd.prog.mk> END-of-src/cjktexsty/Makefile echo c - src/fixrtf mkdir -p src/fixrtf > /dev/null 2>&1 echo x - src/fixrtf/Makefile sed 's/^X//' >src/fixrtf/Makefile << 'END-of-src/fixrtf/Makefile' X# $FreeBSD$ X XPROG= fixrtf XSRCS= fixrtf.l X XPREFIX?= /usr/local XBINDIR= ${PREFIX}/bin XCFLAGS+=-I${PREFIX}/include XLDADD= -L${PREFIX}/lib -lpng XNO_MAN= XNOMAN= X X.include <bsd.prog.mk> END-of-src/fixrtf/Makefile echo x - src/fixrtf/fixrtf.l sed 's/^X//' >src/fixrtf/fixrtf.l << 'END-of-src/fixrtf/fixrtf.l' X%{ X/*- X * Copyright (c) 2005, 2006 intron <intron@intron.ac>. All rights reserved. X * Copyright (c) 2005, 2006 The FreeBSD Simplified Chinese Project. X * All rights reserved. X * X * This code is derived from software contributed to The FreeBSD Simplified X * Chinese Project by intron. X * X * Redistribution and use in source and binary forms, with or without X * modification, are permitted provided that the following conditions X * are met: X * 1. Redistributions of source code must retain the above copyright X * notice, this list of conditions and the following disclaimer. X * 2. Redistributions in binary form must reproduce the above copyright X * notice, this list of conditions and the following disclaimer in the X * documentation and/or other materials provided with the distribution. X * X * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND X * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE X * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE X * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE X * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL X * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS X * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) X * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT X * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY X * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF X * SUCH DAMAGE. X * X * From CNPROJ: doc/zh_CN.GB2312/share/mk/fixrtf.lex,v 1.1.1000.20 2006/02/19 10:21:40 intron X */ X X#include <sys/cdefs.h> X__FBSDID("$FreeBSD$"); X X#include <err.h> X#include <assert.h> X#include <stdio.h> X#include <string.h> X#include <sys/param.h> X#include <stdlib.h> X#include <unistd.h> X#include <time.h> X#include <png.h> X X/* X * This program is used to fix RTF: X * 1. Embed PNGs into RTF. X * 2. Embed FreeBSD-specific information into RTF, such as organization name, X * building time. But unfortunately, so far only Microsoft Word can read X * them. In contrast, Microsoft Word Viewer and OpenOffice even cannot read X * this kind of information from RTF created by Microsoft Word and X * OpenOffice. (Option: -i) X * 3. Do some locale-specific fixing. (Option: -e <encoding>) X * X * See also Rich Text Format (RTF) Specification: X * 1. Version 1.8 (Microsoft Word 2003) X * http://www.microsoft.com/downloads/details.aspx?familyid=ac57de32-17f0-4b46-9e4e-467ef9bc5540&displaylang=en X * 2. Version 1.7 (Microsoft Word 2002) X * http://support.microsoft.com/kb/q86999/ X * 3. Version 1.6 (Microsoft Word 2000) X * http://msdn.microsoft.com/library/en-us/dnrtfspec/html/rtfspec.asp X */ X X Xint embedpng_enable=0; X X/* See also http://msdn.microsoft.com/library/en-us/intl/unicode_81rn.asp */ X#define ENCODING_UNKNOWN 0 X#define ENCODING_GB2312 936 X#define ENCODING_GB18030 54936 X#define ENCODING_BIG5 950 X Xint encoding=ENCODING_UNKNOWN; X X Xint fetchinfo_enable=0; /* FALSE */ X X X#define MY_BUFFER_SIZE 3072 X#define MY_BUFFER_LIMIT 2048 X X/* MY_BUFFER_LIMIT is smaller MY_BUFFER_SIZE, reserving some redundance. */ X X/* X * "mybuffer" is used to cache RTF stream X * while fetching book/article information. X */ Xsize_t mybufferlength=0; Xchar mybuffer[MY_BUFFER_SIZE]; X X X#define INFO_TITLE 0 X#define INFO_AUTHOR 1 X X/* To store fetched book/article information */ Xstruct X{ X size_t length; X char text[MY_BUFFER_SIZE]; X} *pinfobuf=NULL,infobuf[]= X{ X {0,""}, X {0,""} X}; X X/* X * See also the section "Pictures" in RTF specification. X */ Xvoid Xembedpng(char *field) X{ X char *p1,*p2,fn[PATH_MAX]; X unsigned char buf[256]; X FILE *fp; X int l,i,nret; X png_structp png_ptr; X png_infop info_ptr,end_info; X png_uint_32 width,height; X X p1=strcasestr(field,"INCLUDEPICTURE"); X p1=strchr(p1+14,'"'); /* String after "INCLUDEPICTURE" */ X p2=strchr(p1+1,'"'); X l=p2-(p1+1); /* Substantial length of file name */ X if(l>sizeof(fn)-1) X { X warnx("*** Buffer Overflow Attack Detected !!! ***"); X exit(1); X } X memcpy(fn,p1+1,l); X fn[l]=0; X X if(l<4) /* It should be longer than ".png". */ X { X warnx("File name '%s' is too short!",fn); X goto embedpng_exit_1; X } X X if(strcasecmp(fn+(l-4),".png")!=0) X { X warnx("File name '%s' has not a suffix '.png'. Keep untouched.",fn); X goto embedpng_exit_1; X } X X if((fp=fopen(fn,"rb"))==NULL) X { X warnx("Failed to open '%s'!",fn); X goto embedpng_exit_1; X } X X fread(buf,1,8,fp); X if (png_sig_cmp(buf,0,8)) X { X warnx("The file '%s' is NOT in PNG format!",fn); X goto embedpng_exit_2; X } X png_ptr=png_create_read_struct(PNG_LIBPNG_VER_STRING,NULL,NULL,NULL); X if (!png_ptr) X { X warnx("Unable to create PNG read struct(*png_ptr)!"); X goto embedpng_exit_2; X } X info_ptr=png_create_info_struct(png_ptr); X if (!info_ptr) X { X warnx("Unable to create PNG info struct(*info_ptr)!"); X png_destroy_read_struct(&png_ptr,(png_infopp)NULL,(png_infopp)NULL); X goto embedpng_exit_2; X } X end_info=png_create_info_struct(png_ptr); X if(!end_info) X { X warnx("Unable to create PNG info struct(*end_info)!"); X png_destroy_read_struct(&png_ptr,&info_ptr,(png_infopp)NULL); X goto embedpng_exit_2; X } X if (setjmp(png_jmpbuf(png_ptr))) X { X warnx("LibPNG crashed!"); X png_destroy_read_struct(&png_ptr,&info_ptr,&end_info); X goto embedpng_exit_2; X } X rewind(fp); X png_init_io(png_ptr,fp); X png_read_info(png_ptr,info_ptr); X width=png_get_image_width(png_ptr,info_ptr); X height=png_get_image_height(png_ptr,info_ptr); X X if(width>1024 || height>768) warnx("Picture is too large!"); X X /* X * According to Microsoft's RTF specification, \picwN and \pichN is X * mandatory for \pict group. Actually, in both Microsoft Word Viewer X * and OpenOffice, these two control words take no effect for PNG. X */ X printf("{\\pict\\pngblip\\picscalex100\\picscaley100\\picw%u\\pich%u", X (unsigned int)width,(unsigned int)height); X X rewind(fp); X while((nret=fread(buf,1,64,fp))>0) X { X printf("\n"); X for(i=0;i<nret;i++) X printf("%02x",(unsigned int)((unsigned char)buf[i])); X } X X printf("}"); X X warnx("'%s' (%ux%u) embedded.",fn,(unsigned int)width,(unsigned int)height); X X png_destroy_read_struct(&png_ptr,&info_ptr,&end_info); X fclose(fp); X goto embedpng_exit_0; X Xembedpng_exit_2:; X fclose(fp); Xembedpng_exit_1:; X printf("%s",field); /* Keep link in RTF untouched */ Xembedpng_exit_0:; X return; X} X X/* X * See also the section "Font Table" in RTF specification. X */ Xvoid Xmodifycharset(char *fcharset) X{ X char *s; X X switch(encoding) X { X case ENCODING_GB2312: X case ENCODING_GB18030: /* GB18030 is not supported in RTF so far */ X s="\\fcharset134"; X break; X case ENCODING_BIG5: X s="\\fcharset136"; X break; X default: X s="\\fcharset1"; /* "Default" */ X break; X } X X printf("%s",s); X X warnx("Charset control word modified: %s -> %s",fcharset,s); X X return; X} X X/* X * (init|addto|flush)mybuffer maintain buffer to cache RTF stream X * while fetching book/article information. X */ Xvoid initmybuffer() X{ X int i; X X mybufferlength=0; X for(i=0;i<sizeof(infobuf)/sizeof(infobuf[0]);i++) X { X infobuf[i].length=0; X infobuf[i].text[0]=0; X } X} X Xint addtomybuffer(char *text, size_t leng) X{ X if(mybufferlength+leng>MY_BUFFER_LIMIT) return -1; X /* warnx("_%s_",yytext); */ X memcpy(mybuffer+mybufferlength,text,leng); X mybufferlength+=leng; /* No terminator '\0' */ X return 0; X} X Xvoid flushmybuffer() X{ X fwrite(mybuffer,1,mybufferlength,yyout); X mybufferlength=0; X} X X#define ADDTOBUF { \ X if(addtomybuffer(yytext,yyleng)) \ X { \ X haltfetch(); \ X ECHO; \ X BEGIN(0); \ X warnx("Had been fetching book/article information until buffer was full!"); \ X YY_BREAK; \ X } \ X } X X X/* Collect book/article information RTF sequence */ Xvoid collectinfo(char *text, size_t leng) X{ X assert(pinfobuf!=NULL); X if(pinfobuf->length+leng>=MY_BUFFER_LIMIT) /* Consider terminator '\0' */ X { X warnx("*** Too long text for title or author !!! ***"); X warnx("*** Buffer Overflow Attack To Be Considered !!! ***"); X return; /* Information item buffer is full. */ X } X memcpy(pinfobuf->text+pinfobuf->length,text,leng); X pinfobuf->length+=leng; X pinfobuf->text[pinfobuf->length]=0; X} X X/* Identify a RTF control word */ Xint identifyctrlword(char *text, size_t leng, char *key) X{ X if(text[leng-1]==' ') X { /* Tailed by a space as delimiter */ X if(strlen(key)!=leng-1) return 0; X return !strncmp(text,key,leng-1); X } X X return !strcmp(text,key); X} X X/* X * Output fetch book/article information. X * See also the section "Information Group" in RTF specification. X */ Xvoid outputinfo() X{ X time_t t; X char buf[128]; X X printf("{\\info\\uc0"); X X printf("{\\title %s}{\\author %s}", X infobuf[INFO_TITLE].text,infobuf[INFO_AUTHOR].text); X X time(&t); X strftime(buf,sizeof(buf),"\\yr%Y\\mo%m\\dy%d\\hr%H\\min%M\\sec%S",localtime(&t)); X printf("{\\creatim%s}",buf); X X printf("}"); X} X Xvoid haltfetch() X{ X warnx("Title: %s",infobuf[INFO_TITLE].text); X warnx("Author: %s",infobuf[INFO_AUTHOR].text); X outputinfo(); X flushmybuffer(); X} X X%} X X%option noyywrap X X%s fetchinfo X Xpnglink \{\\field[^{}]*\{[^{}]*INCLUDEPICTURE[^{}]*\".+\"[^{}]*\}\{[^{}]*\}[^{}]*\} Xsjischarset \\fcharset128 Xstylesheet \{\\stylesheet[ ]? Xtitlebegin \\pard.{1,25}\\fs49[ ]? Xauthorbegin \\pard.{1,25}\\fs34[ ]? Xrtfhexvalue \\\'[0-9A-Fa-f]{2} Xrtfctrlword \\[a-z]+([-]?[0-9]+)?[ ]? Xrtfctrlsymbol \\[^a-z] X X%% X X{pnglink} { /* X * Substitute RTF \pict group for RTF field group. X * An example generated by Jade/OpenJade: X * {\field\flddirty{\*\fldinst INCLUDEPICTURE "sockets/layers.png" }{\fldrslt }} X */ X if(embedpng_enable) embedpng(yytext); X else { ECHO; } X } X X{sjischarset} { X /* X * Jade/OpenJade mis-mark Chinese as Shift-JIS encoded Japanese. X * This may cause RTF viewer to display Chinese with Japanese font. X */ X if(encoding!=ENCODING_UNKNOWN) modifycharset(yytext); X else { ECHO; } X } X X{stylesheet} { /* Insert book/article information just before style sheet. */ X if(fetchinfo_enable) X { /* Begin fetching book/article information. */ X initmybuffer(); X BEGIN(fetchinfo); X fetchinfo_enable=0; /* FALSE, one-off */ X ADDTOBUF; X } X else X { X ECHO; X } X } X X<fetchinfo>{titlebegin} { /* Beginning of title, hacked by font size. */ X ADDTOBUF; X pinfobuf=&(infobuf[INFO_TITLE]); X if(pinfobuf->length>0) collectinfo(", ",2); /* Duplicated */ X } X X<fetchinfo>{authorbegin} { /* Beginning of author, hacked by font size. */ X ADDTOBUF; X pinfobuf=&(infobuf[INFO_AUTHOR]); X if(pinfobuf->length>0) collectinfo(", ",2); /* Duplicated */ X } X X<fetchinfo>{rtfhexvalue} { /* A hexadecimal value, ignore. */ X ADDTOBUF; X } X X<fetchinfo>\\~ { /* Nonbreaking space, a control symbol, collect */ X ADDTOBUF; X if(pinfobuf!=NULL) collectinfo(" ",1); X } X X<fetchinfo>\\[-_] { /* Optional/nonbreaking hyphen, a control symbol, collect */ X ADDTOBUF; X if(pinfobuf!=NULL) collectinfo("-",1); X } X X<fetchinfo>{rtfctrlsymbol} { /* Other control symbols, ignore */ X ADDTOBUF; X } X X<fetchinfo>{rtfctrlword} { /* Control word */ X ADDTOBUF; X X if(identifyctrlword(yytext,yyleng,"\\keepn")) X { /* End of title or author, actually a hack */ X pinfobuf=NULL; X } X else if(yytext[0]=='\\' && yytext[1]=='u' && X ((yytext[2]>='0' && yytext[2]<='9') || yytext[2]=='-') ) X { /* Unicode Character, collect */ X if(pinfobuf!=NULL) X { X collectinfo(yytext,yyleng); X if(yytext[yyleng-1]!=' ') collectinfo(" ",1); X } X } X else if(identifyctrlword(yytext,yyleng,"\\page")) X { /* Accomplished !!! */ X haltfetch(); X BEGIN(0); X } X } X X<fetchinfo>[\n{}] { /* Ignore */ X ADDTOBUF; X } X X<fetchinfo>. { /* Collect */ X ADDTOBUF; X if(pinfobuf!=NULL) collectinfo(yytext,yyleng); X } X X%% X Xvoid printusage() X{ X fprintf(stderr, "Usage: fixrtf [-e encoding] [-i] [-p] < inputfile > outputfile\n" X " Fix RTF file generated by Jade/OpenJade.\n" X "Options:\n" X " -e encoding\n" X " Specify encoding to do specific fixing. (GB2312|BIG5)\n" X " -i\n" X " Fill RTF file information, such as title and author,\n" X " hacked from RTF file generated by Jade/OpenJade.\n" X " -p\n" X " Embed linked PNG images into RTF file.\n" X ); X} X Xint Xmain(int argc, char *argv[]) X{ X int ch; X X if(argc<=1) X { X warnx("You should indicate at least one kind of fixing."); X printusage(); X return 1; X } X X while ((ch = getopt(argc, argv, "e:ip")) != -1) X { X switch (ch) X { X case 'e': X if(strcasecmp(optarg,"GB2312")==0 || X strcasecmp(optarg,"GBK")==0) X { X encoding=ENCODING_GB2312; X } X else if(strcasecmp(optarg,"GB18030")==0) X { X encoding=ENCODING_GB18030; X } X else if(strcasecmp(optarg,"BIG5")==0) X { X encoding=ENCODING_BIG5; X } X break; X case 'i': X fetchinfo_enable=1; /* One-off */ X break; X case 'p': X embedpng_enable=1; X break; X default: X printusage(); X return 1; X break; X } X } X X yylex(); X X return 0; X} END-of-src/fixrtf/fixrtf.l exit --- zh-docproj.shar ends here --- >Release-Note: >Audit-Trail: >Unformatted:
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200602251905.k1PJ5OhG056456>