Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 26 Feb 2006 03:05:24 +0800 (CST)
From:      Xin LI <delphij@FreeBSD.org>
To:        FreeBSD-gnats-submit@FreeBSD.org
Cc:        doceng@FreeBSD.org, intron@intron.ac, hrs@FreeBSD.org, murray@FreeBSD.org, portmgr@FreeBSD.org
Subject:   ports/93836: [NEW PORT] chinese/zh-docproj
Message-ID:  <200602251905.k1PJ5OhG056456@tarsier.delphij.net>
Resent-Message-ID: <200602251910.k1PJA45x010670@freefall.freebsd.org>

next in thread | raw e-mail | index | archive | help

>Number:         93836
>Category:       ports
>Synopsis:       [NEW PORT] chinese/zh-docproj
>Confidential:   no
>Severity:       non-critical
>Priority:       low
>Responsible:    freebsd-ports-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          change-request
>Submitter-Id:   current-users
>Arrival-Date:   Sat Feb 25 19:10:04 GMT 2006
>Closed-Date:
>Last-Modified:
>Originator:     Xin LI
>Release:        FreeBSD 6.1-PRERELEASE i386
>Organization:
The FreeBSD Simplified Chinese Project
>Environment:
System: FreeBSD tarsier.delphij.net 6.1-PRERELEASE FreeBSD 6.1-PRERELEASE #9: Sat Feb 25 23:49:38 CST 2006 delphij@tarsier.delphij.net:/usr/obj/usr/src/sys/TARSIER i386


>Description:
	zh-docproj is a set of supportive tools that is used to
correctly generate PDFs for far east languages.  We have used
these tools to help generate PDFs for the FreeBSD Chinese Project.

	The current snapshot of zh-docproj would support GB2312,
GBK, BIG5, EUCJP, EUCKR and UTF-8 encodings.

	The code were taken from the FreeBSD Simplified Chinese
Project CVS, mostly by intron <intron at intron dot ac>.

	I would be happy if the portmgr@ would give us an
approval to add this port into the tree before the final release,
which will make it possible to build PDF for the documentations
translated into far east languages correctly.

>How-To-Repeat:
>Fix:


--- zh-docproj.shar begins here ---
# This is a shell archive.  Save it in a file, remove anything before
# this line, and then unpack it by entering "sh file".  Note, it may
# create directories; files and directories will be owned by you and
# have default permissions.
#
# This archive contains:
#
#	Makefile
#	pkg-descr
#	pkg-plist
#	src
#	src/Makefile
#	src/cjktexsty
#	src/cjktexsty/cjktexsty.l
#	src/cjktexsty/Makefile
#	src/fixrtf
#	src/fixrtf/Makefile
#	src/fixrtf/fixrtf.l
#
echo x - Makefile
sed 's/^X//' >Makefile << 'END-of-Makefile'
X# New ports collection makefile for:	zh-docproj
X# Date created:		26 Feb 2006
X# Whom:			Xin LI <delphij@FreeBSD.org>
X#
X# $FreeBSD$
X#
X# This port is self contained in the src directory.
X#
X
XPORTNAME=	docproj
XPORTVERSION=	0.1.20060226
XCATEGORIES=	chinese
XMASTER_SITES=	# none
XPKGNAMEPREFIX=	zh-
XDISTFILES=	# none
X
X# Updates from intron@intron.ac should also be considered
X# as maintainer updates.
XMAINTAINER=	delphij@FreeBSD.org
XCOMMENT=	Supportive tools for Chinese docproj build
X
XLIB_DEPENDS=	iconv:${PORTSDIR}/converters/libiconv \
X		png.5:${PORTSDIR}/graphics/png
X
XWRKSRC=		${WRKDIR}/src
X
XSRC=		${.CURDIR}/src
X
Xdo-fetch:
X	@${DO_NADA}
X
Xpre-patch:
X	@${CP} -R ${SRC} ${WRKDIR}
X
X.include <bsd.port.mk>
END-of-Makefile
echo x - pkg-descr
sed 's/^X//' >pkg-descr << 'END-of-pkg-descr'
Xzh-docproj is a set of utilities that is used to build docproj PDFs.
XThis utility is maintained by the FreeBSD Simplified Chinese Project.
X
XWWW: http://www.freebsd.org.cn
END-of-pkg-descr
echo x - pkg-plist
sed 's/^X//' >pkg-plist << 'END-of-pkg-plist'
Xbin/cjktexsty
Xbin/fixrtf
END-of-pkg-plist
echo c - src
mkdir -p src > /dev/null 2>&1
echo x - src/Makefile
sed 's/^X//' >src/Makefile << 'END-of-src/Makefile'
X# $FreeBSD$
X
XSUBDIR=	cjktexsty fixrtf
X
X.include <bsd.subdir.mk>
END-of-src/Makefile
echo c - src/cjktexsty
mkdir -p src/cjktexsty > /dev/null 2>&1
echo x - src/cjktexsty/cjktexsty.l
sed 's/^X//' >src/cjktexsty/cjktexsty.l << 'END-of-src/cjktexsty/cjktexsty.l'
X%{
X/*-
X * Copyright (c) 2005, 2006 intron <intron@intron.ac>.  All rights reserved.
X * Copyright (c) 2005, 2006 The FreeBSD Simplified Chinese Project.
X * All rights reserved.
X *
X * This code is derived from software contributed to The FreeBSD Simplified
X * Chinese Project by intron.
X *
X * Redistribution and use in source and binary forms, with or without
X * modification, are permitted provided that the following conditions
X * are met:
X * 1. Redistributions of source code must retain the above copyright
X *    notice, this list of conditions and the following disclaimer.
X * 2. Redistributions in binary form must reproduce the above copyright
X *    notice, this list of conditions and the following disclaimer in the
X *    documentation and/or other materials provided with the distribution.
X *
X * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
X * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
X * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
X * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
X * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
X * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
X * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
X * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
X * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
X * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
X * SUCH DAMAGE.
X *
X *	CNPROJ: doc/zh_CN.GB2312/share/mk/cjktexsty.lex,v 1.1.1000.40 2006/02/19 20:32:32 intron Exp
X */
X
X#include <sys/cdefs.h>
X__FBSDID("$FreeBSD$");
X
X#include <err.h>
X#include <stdio.h>
X#include <string.h>
X#include <unistd.h>
X#include <iconv.h>
X
Xchar texencoding[128]="",*cjkencoding=NULL,cjkfont[128]="";
Xiconv_t iconvhandle;
Xint ccmap_enable=0;
X
Xvoid
Xerrexit(void)
X{
X
X	errx(1, "Error: line %d", yylineno);
X}
X
Xvoid
Xtranscode(char *ch)
X{
X	char *pchar,*pout,input[16],output[128];
X	const char *pin;
X	int c;
X	size_t lin,lout;
X	size_t outlen;
X
X	pchar=strstr(ch,"{");
X	if(pchar==NULL)
X	    errexit();
X	if(sscanf(pchar+1,"%d",&c)!=1)
X	    errexit();
X
X	/* UCS-4 big endian, including not only Basic Multilingual Plane */
X	input[0]=(c&0xff000000)>>24;
X	input[1]=(c&0xff0000)>>16;
X	input[2]=(c&0xff00)>>8;
X	input[3]=(c&0xff);
X	pin=input;
X	lin=4;
X
X	pout=output;
X	lout=sizeof(output);
X
X	iconv(iconvhandle,&pin,&lin,&pout,&lout);
X
X	if(lin!=0) {
X	    switch(c) {
X	    case 8212: strcpy(output,"\\ensuremath{-}"); break;
X	    case 8226: strcpy(output,"\\ensuremath{\\bullet}"); break;
X	    case 8482: strcpy(output,"\\ensuremath{^{\\mathrm{TM}}}"); break;
X	    case 10122: strcpy(output,"{\\large\\ding{202}}"); break;
X	    case 10123: strcpy(output,"{\\large\\ding{203}}"); break;
X	    case 10124: strcpy(output,"{\\large\\ding{204}}"); break;
X	    case 10125: strcpy(output,"{\\large\\ding{205}}"); break;
X	    case 10126: strcpy(output,"{\\large\\ding{206}}"); break;
X	    case 10127: strcpy(output,"{\\large\\ding{207}}"); break;
X	    case 10128: strcpy(output,"{\\large\\ding{208}}"); break;
X	    case 10129: strcpy(output,"{\\large\\ding{209}}"); break;
X	    case 10130: strcpy(output,"{\\large\\ding{210}}"); break;
X	    case 10131: strcpy(output,"{\\large\\ding{211}}"); break;
X	    case 10132: strcpy(output,"\\ensuremath{\\rightarrow}"); break;
X	    case 65533: strcpy(output,"{\\large\\ding{96}}"); break;
X	    default:
X		warnx("Unable to find a substitute for UNICODE character &#%d;", c);
X		strcpy(output,"??");
X		break;
X	    }
X	} else {
X		outlen=sizeof(output)-lout;
X		output[outlen]=0;
X
X		if(outlen==2 && strcspn(output,"\\$&%#@{}^_~\x80")!=outlen)
X		{ /* TeX special character */
X			sprintf(output,"\\CJKchar{%u}{%u}",
X				(unsigned int)(unsigned char)output[0],
X				(unsigned int)(unsigned char)output[1]
X				);
X		}
X	}
X
X	printf("%s",output);
X}
X
X%}
X
X%option yylineno
X%option noyywrap
X
Xfotbegin	\\FOT\{[^}]*\}
Xfotend		\\endFOT\{[^}]*\}
Xcjk		\\Character\{[0-9]{1,5}\}
X
X%%
X
X{fotbegin}	{
X			/*
X			 * A confusing but practical structure:
X			 *
X			 * \usepackage{CJK}
X			 *     \begin{CJK*}{GB}{song}
X			 *         \FOT{3}
X			 *
X			 *             ...
X			 *
X			 *     \end{CJK*}
X			 *         \endFOT{}
X			 *
X			 * The macro call \begin{CJK*} must be put before
X			 * \FOT, or generated PDF will include many "@".
X			 */
X			printf("\\usepackage{textcomp}\n");
X			printf("\\usepackage{pifont}\n");
X			printf("\\usepackage{wasysym}\n");
X			printf("\\usepackage{CJK}\n");
X			if(ccmap_enable) printf("\\usepackage{ccmap}\n");
X			printf("\\hypersetup{CJKbookmarks=true,hypertex,pdfauthor={FreeBSD Documentation Project}}\n");
X			printf("\\begin{CJK*}{%s}{%s}\n%s\n",cjkencoding,cjkfont,yytext);
X		}
X{fotend}	{
X			/*
X			 * \FOT does NOT include \begin{document},
X			 * while \endFOT includes \end{document} explicitly.
X			 * Thus, \endFOT should NOT be put between
X			 * \begin{CJK*} and \end{CJK*},
X			 * whether there is a \FOT between them or not.
X			 */
X			printf("\n\\end{CJK*}%s\n",yytext);
X		}
X{cjk}		{ transcode(yytext); }
X
X[\xA0]		{ printf("{\\nobreakspace}"); }
X[\xA1]		{ printf("{\\textexclamdown}"); }
X[\xA2]		{ printf("{\\textcent}"); }
X[\xA3]		{ printf("{\\pounds}"); }
X[\xA4]		{ printf("{\\textcurrency}"); }
X[\xA5]		{ printf("{\\textyen}"); }
X[\xA6]		{ printf("{\\textbrokenbar}"); }
X[\xA7]		{ printf("{\\S}"); }
X[\xA8]		{ printf("{\\\"{}}"); }
X[\xA9]		{ printf("{\\copyright}"); }
X[\xAA]		{ printf("{\\textordfeminine}"); }
X[\xAB]		{ printf("\\ensuremath{_{^{\\ll}}}"); }
X[\xAC]		{ printf("\\ensuremath{\\lnot}"); }
X[\xAD]		{ printf("{-}"); }
X[\xAE]		{ printf("{\\textregistered}"); }
X[\xAF]		{ printf("\\ensuremath{^{-}}"); }
X[\xB0]		{ printf("{\\textdegree}"); }
X[\xB1]		{ printf("\\ensuremath{\\pm}"); }
X[\xB2]		{ printf("\\ensuremath{^{2}}"); }
X[\xB3]		{ printf("\\ensuremath{^{3}}"); }
X[\xB4]		{ printf("\\ensuremath{'}"); }
X[\xB5]		{ printf("\\ensuremath{\\mu}"); }
X[\xB6]		{ printf("{\\P}"); }
X[\xB7]		{ printf("{\\ifmmode\\cdot\\else\\textperiodcentered\\fi}"); }
X[\xB8]		{ printf("\\c{}"); }
X[\xB9]		{ printf("\\ensuremath{^{1}}"); }
X[\xBA]		{ printf("{\\textordmasculine}"); }
X[\xBB]		{ printf("\\ensuremath{_{^{\\gg}}}"); }
X[\xBC]		{ printf("{\\textonequarter}"); }
X[\xBD]		{ printf("{\\textonehalf}"); }
X[\xBE]		{ printf("{\\textthreequarters}"); }
X[\xBF]		{ printf("{\\textquestiondown}"); }
X[\xC0]		{ printf("\\ensuremath{\\grave{\\mathrm{A}}}"); }
X[\xC1]		{ printf("\\ensuremath{\\acute{\\mathrm{A}}}"); }
X[\xC2]		{ printf("{\\^A}"); }
X[\xC3]		{ printf("{\\~A}"); }
X[\xC4]		{ printf("{\\\"A}"); }
X[\xC5]		{ printf("{\\AA}"); }
X[\xC6]		{ printf("{\\AE}"); }
X[\xC7]		{ printf("{\\c C}"); }
X[\xC8]		{ printf("\\ensuremath{\\grave{\\mathrm{E}}}"); }
X[\xC9]		{ printf("\\ensuremath{\\acute{\\mathrm{E}}}"); }
X[\xCA]		{ printf("{\\^E}"); }
X[\xCB]		{ printf("{\\\"E}"); }
X[\xCC]		{ printf("\\ensuremath{\\grave{\\mathrm{I}}}"); }
X[\xCD]		{ printf("\\ensuremath{\\acute{\\mathrm{I}}}"); }
X[\xCE]		{ printf("{\\^I}"); }
X[\xCF]		{ printf("{\\\"I}"); }
X[\xD0]		{ printf("{\\DH}"); }
X[\xD1]		{ printf("{\\~N}"); }
X[\xD2]		{ printf("\\ensuremath{\\grave{\\mathrm{O}}}"); }
X[\xD3]		{ printf("\\ensuremath{\\acute{\\mathrm{O}}}"); }
X[\xD4]		{ printf("{\\^O}"); }
X[\xD5]		{ printf("{\\~O}"); }
X[\xD6]		{ printf("{\\\"O}"); }
X[\xD7]		{ printf("\\ensuremath{\\times}"); }
X[\xD8]		{ printf("{\\O}"); }
X[\xD9]		{ printf("\\ensuremath{\\grave{\\mathrm{U}}}"); }
X[\xDA]		{ printf("\\ensuremath{\\acute{\\mathrm{U}}}"); }
X[\xDB]		{ printf("{\\^U}"); }
X[\xDC]		{ printf("{\\\"U}"); }
X[\xDD]		{ printf("\\ensuremath{\\acute{\\mathrm{Y}}}"); }
X[\xDE]		{ printf("{\\Thorn}"); }
X[\xDF]		{ printf("{\\ss}"); }
X[\xE0]		{ printf("\\ensuremath{\\grave{\\mathrm{a}}}"); }
X[\xE1]		{ printf("\\ensuremath{\\acute{\\mathrm{a}}}"); }
X[\xE2]		{ printf("{\\^a}"); }
X[\xE3]		{ printf("{\\~a}"); }
X[\xE4]		{ printf("{\\\"a}"); }
X[\xE5]		{ printf("{\\aa}"); }
X[\xE6]		{ printf("{\\ae}"); }
X[\xE7]		{ printf("{\\c c}"); }
X[\xE8]		{ printf("\\ensuremath{\\grave{\\mathrm{e}}}"); }
X[\xE9]		{ printf("\\ensuremath{\\acute{\\mathrm{e}}}"); }
X[\xEA]		{ printf("{\\^e}"); }
X[\xEB]		{ printf("{\\\"e}"); }
X[\xEC]		{ printf("\\ensuremath{\\grave{\\mathrm{\\i}}}"); }
X[\xED]		{ printf("\\ensuremath{\\acute{\\mathrm{\\i}}}"); }
X[\xEE]		{ printf("{\\^\\i}"); }
X[\xEF]		{ printf("{\\\"\\i}"); }
X[\xF0]		{ printf("{\\dh}"); }
X[\xF1]		{ printf("{\\~n}"); }
X[\xF2]		{ printf("\\ensuremath{\\grave{\\mathrm{o}}}"); }
X[\xF3]		{ printf("\\ensuremath{\\acute{\\mathrm{o}}}"); }
X[\xF4]		{ printf("{\\^o}"); }
X[\xF5]		{ printf("{\\~o}"); }
X[\xF6]		{ printf("{\\\"o}"); }
X[\xF7]		{ printf("\\ensuremath{\\div}"); }
X[\xF8]		{ printf("{\\o}"); }
X[\xF9]		{ printf("\\ensuremath{\\grave{\\mathrm{u}}}"); }
X[\xFA]		{ printf("\\ensuremath{\\acute{\\mathrm{u}}}"); }
X[\xFB]		{ printf("{\\^u}"); }
X[\xFC]		{ printf("{\\\"u}"); }
X[\xFD]		{ printf("\\ensuremath{\\acute{\\mathrm{y}}}"); }
X[\xFE]		{ printf("{\\thorn}"); }
X[\xFF]		{ printf("{\\\"y}"); }
X
X[\xa0-\xff]	{
X		  warnx("Unable to find a substitute for ISO8859-1 character \\x%X",
X			(unsigned int)(*((unsigned char *)yytext)));
X		  printf("?");
X		}
X
X%%
X
Xvoid printusage()
X{
X	fprintf(stderr,	"Usage: cjktexsty [ -c ] -e encoding -f fontname\n"
X			"      Convert TeX source including \\Character{xxxxx} generated by\n"
X			"      Jade/OpenJade into what CJK-LaTeX can process.\n"
X			"     \n"
X			"NOTE: Jade/OpenJade supports EUC-JP natively. Thus, this tool SHOULD NOT be\n"
X			"      used in this case. This tool treats all bytes larger than 0xa0 as\n"
X			"      ISO 8859-1 characters, and converts \\Character{xxxxx} into encoding\n"
X			"      that CJK-LaTeX can process.\n"
X			"      \n"
X			"Options:\n"
X			"     -c\n"
X			"          Use ccmap.sty for PDFTeX to generate text-copyable CJK PDF.\n"
X			"          The package ccmap.sty is written by Wenchang Sun and Linbo Zhang.\n"
X			"          See also ftp://ftp.cc.ac.cn/pub/cct/ for details.\n"
X			"     -e encoding\n"
X			"          Specify TeX source encoding for CJK-LaTeX.\n"
X			"     -f fontname\n"
X			"          Specify font name in CJK macro call, such as\n"
X			"          \\begin{CJK*}{encoding}{font}.\n"
X			"      \n"
X			"CJK-LaTeX supported combinations by default:\n"
X			"     <TeX source encoding>  <CJK encoding name>  <CJK font name>\n"
X			"     ------------------------------------------------------------\n"
X			"            GB2312                   GB                 song\n"
X			"            GBK                      GBK                song\n"
X			"            BIG5                     Bg5                bsmi\n"
X			"            EUCJP                    JIS                min\n"
X			"            EUCKR                    KS                     \n"
X			"            UTF-8                    UTF8               song\n"
X		);
X}
X
Xint
Xmain(int argc, char *argv[])
X{
X	int ch;
X
X	while ((ch = getopt(argc, argv, "ce:f:")) != -1)
X	{
X		switch (ch)
X		{
X		case 'c':
X			ccmap_enable=1;
X			break;
X		case 'e':
X			if(strcmp(optarg,"GB2312")==0) cjkencoding="GB";
X			else if(strcmp(optarg,"GBK")==0) cjkencoding="GBK";
X			else if(strcmp(optarg,"GB18030")==0) cjkencoding="GBK"; /* Not supported by CJK yet */
X			else if(strcmp(optarg,"BIG5")==0) cjkencoding="Bg5";
X			else if(strcmp(optarg,"EUCJP")==0) cjkencoding="JIS";
X			else if(strcmp(optarg,"EUCKR")==0) cjkencoding="KS";
X			else if(strcmp(optarg,"UTF-8")==0) cjkencoding="UTF8";
X			else cjkencoding=NULL;
X			if(cjkencoding!=NULL) strlcpy(texencoding,optarg,sizeof(texencoding));
X			break;
X		case 'f':
X			strlcpy(cjkfont,optarg,sizeof(cjkfont));
X			break;
X		default:
X			printusage();
X			return 1;
X			break;
X		}
X	}
X
X	if(cjkencoding==NULL)
X	{
X		printusage();
X		return 1;
X	}
X
X	iconvhandle=iconv_open(texencoding,"UCS-4BE");
X	yylex();
X	iconv_close(iconvhandle);
X	return 0;
X}
END-of-src/cjktexsty/cjktexsty.l
echo x - src/cjktexsty/Makefile
sed 's/^X//' >src/cjktexsty/Makefile << 'END-of-src/cjktexsty/Makefile'
X# $FreeBSD$
X
XPROG=	cjktexsty
XSRCS=	cjktexsty.l
X
XPREFIX?=	/usr/local
XBINDIR=	${PREFIX}/bin
XCFLAGS+=-I${PREFIX}/include
XLDADD=	-L${PREFIX}/lib -liconv
XNO_MAN=
XNOMAN=
X
X.include <bsd.prog.mk>
END-of-src/cjktexsty/Makefile
echo c - src/fixrtf
mkdir -p src/fixrtf > /dev/null 2>&1
echo x - src/fixrtf/Makefile
sed 's/^X//' >src/fixrtf/Makefile << 'END-of-src/fixrtf/Makefile'
X# $FreeBSD$
X
XPROG=	fixrtf
XSRCS=	fixrtf.l
X
XPREFIX?=	/usr/local
XBINDIR=	${PREFIX}/bin
XCFLAGS+=-I${PREFIX}/include
XLDADD=	-L${PREFIX}/lib -lpng
XNO_MAN=
XNOMAN=
X
X.include <bsd.prog.mk>
END-of-src/fixrtf/Makefile
echo x - src/fixrtf/fixrtf.l
sed 's/^X//' >src/fixrtf/fixrtf.l << 'END-of-src/fixrtf/fixrtf.l'
X%{
X/*-
X * Copyright (c) 2005, 2006 intron <intron@intron.ac>.  All rights reserved.
X * Copyright (c) 2005, 2006 The FreeBSD Simplified Chinese Project.
X * All rights reserved.
X *
X * This code is derived from software contributed to The FreeBSD Simplified
X * Chinese Project by intron.
X *
X * Redistribution and use in source and binary forms, with or without
X * modification, are permitted provided that the following conditions
X * are met:
X * 1. Redistributions of source code must retain the above copyright
X *    notice, this list of conditions and the following disclaimer.
X * 2. Redistributions in binary form must reproduce the above copyright
X *    notice, this list of conditions and the following disclaimer in the
X *    documentation and/or other materials provided with the distribution.
X *
X * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
X * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
X * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
X * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
X * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
X * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
X * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
X * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
X * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
X * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
X * SUCH DAMAGE.
X *
X * From CNPROJ: doc/zh_CN.GB2312/share/mk/fixrtf.lex,v 1.1.1000.20 2006/02/19 10:21:40 intron
X */
X
X#include <sys/cdefs.h>
X__FBSDID("$FreeBSD$");
X
X#include <err.h>
X#include <assert.h>
X#include <stdio.h>
X#include <string.h>
X#include <sys/param.h>
X#include <stdlib.h>
X#include <unistd.h>
X#include <time.h>
X#include <png.h>
X
X/*
X * This program is used to fix RTF:
X * 1. Embed PNGs into RTF.
X * 2. Embed FreeBSD-specific information into RTF, such as organization name,
X *    building time. But unfortunately, so far only Microsoft Word can read
X *    them. In contrast, Microsoft Word Viewer and OpenOffice even cannot read
X *    this kind of information from RTF created by Microsoft Word and
X *    OpenOffice. (Option: -i)
X * 3. Do some locale-specific fixing. (Option: -e <encoding>)
X *
X * See also Rich Text Format (RTF) Specification:
X * 1. Version 1.8 (Microsoft Word 2003)
X *    http://www.microsoft.com/downloads/details.aspx?familyid=ac57de32-17f0-4b46-9e4e-467ef9bc5540&displaylang=en
X * 2. Version 1.7 (Microsoft Word 2002)
X *    http://support.microsoft.com/kb/q86999/
X * 3. Version 1.6 (Microsoft Word 2000)
X *    http://msdn.microsoft.com/library/en-us/dnrtfspec/html/rtfspec.asp
X */
X
X
Xint embedpng_enable=0;
X
X/* See also http://msdn.microsoft.com/library/en-us/intl/unicode_81rn.asp */
X#define	ENCODING_UNKNOWN	0
X#define	ENCODING_GB2312		936
X#define	ENCODING_GB18030	54936
X#define	ENCODING_BIG5		950
X
Xint encoding=ENCODING_UNKNOWN;
X
X
Xint fetchinfo_enable=0; /* FALSE */
X
X
X#define MY_BUFFER_SIZE		3072
X#define MY_BUFFER_LIMIT		2048
X
X/* MY_BUFFER_LIMIT is smaller MY_BUFFER_SIZE, reserving some redundance. */
X
X/*
X * "mybuffer" is used to cache RTF stream 
X * while fetching book/article information.
X */
Xsize_t mybufferlength=0;
Xchar mybuffer[MY_BUFFER_SIZE];
X
X
X#define	INFO_TITLE	0
X#define	INFO_AUTHOR	1
X
X/* To store fetched book/article information */
Xstruct
X{
X	size_t length;
X	char text[MY_BUFFER_SIZE];
X} *pinfobuf=NULL,infobuf[]=
X{
X	{0,""},
X	{0,""}
X};
X
X/*
X * See also the section "Pictures" in RTF specification.
X */
Xvoid
Xembedpng(char *field)
X{
X	char *p1,*p2,fn[PATH_MAX];
X	unsigned char buf[256];
X	FILE *fp;
X	int l,i,nret;
X	png_structp png_ptr;
X	png_infop info_ptr,end_info;
X	png_uint_32 width,height;
X
X	p1=strcasestr(field,"INCLUDEPICTURE");
X	p1=strchr(p1+14,'"'); /* String after "INCLUDEPICTURE" */
X	p2=strchr(p1+1,'"');
X	l=p2-(p1+1); /* Substantial length of file name */
X	if(l>sizeof(fn)-1)
X	{
X		warnx("*** Buffer Overflow Attack Detected !!! ***");
X		exit(1);
X	}
X	memcpy(fn,p1+1,l);
X	fn[l]=0;
X
X	if(l<4) /* It should be longer than ".png". */
X	{
X		warnx("File name '%s' is too short!",fn);
X		goto embedpng_exit_1;
X	}
X
X	if(strcasecmp(fn+(l-4),".png")!=0)
X	{
X		warnx("File name '%s' has not a suffix '.png'. Keep untouched.",fn);
X		goto embedpng_exit_1;
X	}
X
X	if((fp=fopen(fn,"rb"))==NULL)
X	{
X		warnx("Failed to open '%s'!",fn);
X		goto embedpng_exit_1;
X	}
X
X	fread(buf,1,8,fp);
X	if (png_sig_cmp(buf,0,8))
X	{
X		warnx("The file '%s' is NOT in PNG format!",fn);
X		goto embedpng_exit_2;
X	}
X	png_ptr=png_create_read_struct(PNG_LIBPNG_VER_STRING,NULL,NULL,NULL);
X	if (!png_ptr)
X	{
X		warnx("Unable to create PNG read struct(*png_ptr)!");
X		goto embedpng_exit_2;
X	}
X	info_ptr=png_create_info_struct(png_ptr);
X	if (!info_ptr)
X	{
X		warnx("Unable to create PNG info struct(*info_ptr)!");
X		png_destroy_read_struct(&png_ptr,(png_infopp)NULL,(png_infopp)NULL);
X		goto embedpng_exit_2;
X	}
X	end_info=png_create_info_struct(png_ptr);
X	if(!end_info)
X	{
X		warnx("Unable to create PNG info struct(*end_info)!");
X        	png_destroy_read_struct(&png_ptr,&info_ptr,(png_infopp)NULL);
X		goto embedpng_exit_2;
X	}
X	if (setjmp(png_jmpbuf(png_ptr)))
X	{
X		warnx("LibPNG crashed!");
X		png_destroy_read_struct(&png_ptr,&info_ptr,&end_info);
X		goto embedpng_exit_2;
X	}
X	rewind(fp);
X	png_init_io(png_ptr,fp);
X	png_read_info(png_ptr,info_ptr);
X	width=png_get_image_width(png_ptr,info_ptr);
X	height=png_get_image_height(png_ptr,info_ptr);
X
X	if(width>1024 || height>768) warnx("Picture is too large!");
X
X	/*
X	 * According to Microsoft's RTF specification, \picwN and \pichN is
X	 * mandatory for \pict group. Actually, in both Microsoft Word Viewer
X	 * and OpenOffice, these two control words take no effect for PNG.
X	 */
X	printf("{\\pict\\pngblip\\picscalex100\\picscaley100\\picw%u\\pich%u",
X		(unsigned int)width,(unsigned int)height);
X
X	rewind(fp);
X	while((nret=fread(buf,1,64,fp))>0)
X	{
X		printf("\n");
X		for(i=0;i<nret;i++)
X			printf("%02x",(unsigned int)((unsigned char)buf[i]));
X	}
X
X	printf("}");
X
X	warnx("'%s' (%ux%u) embedded.",fn,(unsigned int)width,(unsigned int)height);
X
X	png_destroy_read_struct(&png_ptr,&info_ptr,&end_info);
X	fclose(fp);
X	goto embedpng_exit_0;
X
Xembedpng_exit_2:;
X	fclose(fp);
Xembedpng_exit_1:;
X	printf("%s",field); /* Keep link in RTF untouched */
Xembedpng_exit_0:;
X	return;
X}
X
X/*
X * See also the section "Font Table" in RTF specification.
X */
Xvoid
Xmodifycharset(char *fcharset)
X{
X	char *s;
X
X	switch(encoding)
X	{
X	case ENCODING_GB2312:
X	case ENCODING_GB18030: /* GB18030 is not supported in RTF so far */
X		s="\\fcharset134";
X		break;
X	case ENCODING_BIG5:
X		s="\\fcharset136";
X		break;
X	default:
X		s="\\fcharset1"; /* "Default" */
X		break;
X	}
X
X	printf("%s",s);
X
X	warnx("Charset control word modified: %s -> %s",fcharset,s);
X
X	return;
X}
X
X/*
X * (init|addto|flush)mybuffer maintain buffer to cache RTF stream
X * while fetching book/article information.
X */
Xvoid initmybuffer()
X{
X	int i;
X
X	mybufferlength=0;
X	for(i=0;i<sizeof(infobuf)/sizeof(infobuf[0]);i++)
X	{
X		infobuf[i].length=0;
X		infobuf[i].text[0]=0;
X	}
X}
X
Xint addtomybuffer(char *text, size_t leng)
X{
X	if(mybufferlength+leng>MY_BUFFER_LIMIT) return -1;
X	/* warnx("_%s_",yytext); */
X	memcpy(mybuffer+mybufferlength,text,leng);
X	mybufferlength+=leng; /* No terminator '\0' */
X	return 0;
X}
X
Xvoid flushmybuffer()
X{
X	fwrite(mybuffer,1,mybufferlength,yyout);
X	mybufferlength=0;
X}
X
X#define	ADDTOBUF { \
X	if(addtomybuffer(yytext,yyleng)) \
X	{ \
X		haltfetch(); \
X		ECHO; \
X		BEGIN(0); \
X		warnx("Had been fetching book/article information until buffer was full!"); \
X		YY_BREAK; \
X	} \
X   }
X
X
X/* Collect book/article information RTF sequence */
Xvoid collectinfo(char *text, size_t leng)
X{
X	assert(pinfobuf!=NULL);
X	if(pinfobuf->length+leng>=MY_BUFFER_LIMIT) /* Consider terminator '\0' */
X	{
X		warnx("*** Too long text for title or author !!! ***");
X		warnx("*** Buffer Overflow Attack To Be Considered !!! ***");
X		return; /* Information item buffer is full. */
X	}
X	memcpy(pinfobuf->text+pinfobuf->length,text,leng);
X	pinfobuf->length+=leng;
X	pinfobuf->text[pinfobuf->length]=0;
X}
X
X/* Identify a RTF control word */
Xint identifyctrlword(char *text, size_t leng, char *key)
X{
X	if(text[leng-1]==' ')
X	{ /* Tailed by a space as delimiter */
X		if(strlen(key)!=leng-1) return 0;
X		return !strncmp(text,key,leng-1);
X	}
X
X	return !strcmp(text,key);
X}
X
X/*
X * Output fetch book/article information.
X * See also the section "Information Group" in RTF specification.
X */
Xvoid outputinfo()
X{
X	time_t t;
X	char buf[128];
X
X	printf("{\\info\\uc0");
X
X	printf("{\\title %s}{\\author %s}",
X		infobuf[INFO_TITLE].text,infobuf[INFO_AUTHOR].text);
X
X	time(&t);
X	strftime(buf,sizeof(buf),"\\yr%Y\\mo%m\\dy%d\\hr%H\\min%M\\sec%S",localtime(&t));
X	printf("{\\creatim%s}",buf);
X
X	printf("}");
X}
X
Xvoid haltfetch()
X{
X	warnx("Title: %s",infobuf[INFO_TITLE].text);
X	warnx("Author: %s",infobuf[INFO_AUTHOR].text);
X	outputinfo();
X	flushmybuffer();
X}
X
X%}
X
X%option noyywrap
X
X%s	fetchinfo
X
Xpnglink		\{\\field[^{}]*\{[^{}]*INCLUDEPICTURE[^{}]*\".+\"[^{}]*\}\{[^{}]*\}[^{}]*\}
Xsjischarset	\\fcharset128
Xstylesheet	\{\\stylesheet[ ]?
Xtitlebegin	\\pard.{1,25}\\fs49[ ]?
Xauthorbegin	\\pard.{1,25}\\fs34[ ]?
Xrtfhexvalue	\\\'[0-9A-Fa-f]{2}
Xrtfctrlword	\\[a-z]+([-]?[0-9]+)?[ ]?
Xrtfctrlsymbol	\\[^a-z]
X
X%%
X
X{pnglink}  { /*
X	      * Substitute RTF \pict group for RTF field group.
X	      * An example generated by Jade/OpenJade:
X	      * {\field\flddirty{\*\fldinst INCLUDEPICTURE "sockets/layers.png" }{\fldrslt }}
X	      */
X		if(embedpng_enable) embedpng(yytext);
X		else { ECHO; }
X	   }
X
X{sjischarset}  {
X	      /*
X	       * Jade/OpenJade mis-mark Chinese as Shift-JIS encoded Japanese.
X	       * This may cause RTF viewer to display Chinese with Japanese font.
X	       */
X		if(encoding!=ENCODING_UNKNOWN) modifycharset(yytext);
X		else { ECHO; }
X	   }
X
X{stylesheet}  { /* Insert book/article information just before style sheet. */
X		if(fetchinfo_enable)
X		{ /* Begin fetching book/article information. */
X			initmybuffer();
X			BEGIN(fetchinfo);
X			fetchinfo_enable=0; /* FALSE, one-off */
X			ADDTOBUF;
X		}
X		else
X		{
X			ECHO;
X		}
X	   }
X
X<fetchinfo>{titlebegin}  { /* Beginning of title, hacked by font size. */
X		ADDTOBUF;
X		pinfobuf=&(infobuf[INFO_TITLE]);
X		if(pinfobuf->length>0) collectinfo(", ",2); /* Duplicated */
X	   }
X
X<fetchinfo>{authorbegin}  { /* Beginning of author, hacked by font size. */
X		ADDTOBUF;
X		pinfobuf=&(infobuf[INFO_AUTHOR]);
X		if(pinfobuf->length>0) collectinfo(", ",2); /* Duplicated */
X	   }
X
X<fetchinfo>{rtfhexvalue}  { /* A hexadecimal value, ignore. */
X		ADDTOBUF;
X	   }
X
X<fetchinfo>\\~  { /* Nonbreaking space, a control symbol, collect */
X		ADDTOBUF;
X		if(pinfobuf!=NULL) collectinfo(" ",1);
X	   }
X
X<fetchinfo>\\[-_]  { /* Optional/nonbreaking hyphen, a control symbol, collect */
X		ADDTOBUF;
X		if(pinfobuf!=NULL) collectinfo("-",1);
X	   }
X
X<fetchinfo>{rtfctrlsymbol}  { /* Other control symbols, ignore */
X		ADDTOBUF;
X	   }
X
X<fetchinfo>{rtfctrlword}  { /* Control word */
X		ADDTOBUF;
X
X		if(identifyctrlword(yytext,yyleng,"\\keepn"))
X		{ /* End of title or author, actually a hack */
X			pinfobuf=NULL;
X		}
X		else if(yytext[0]=='\\' && yytext[1]=='u' &&
X			((yytext[2]>='0' && yytext[2]<='9') || yytext[2]=='-') )
X		{ /* Unicode Character, collect */
X			if(pinfobuf!=NULL)
X			{
X				collectinfo(yytext,yyleng);
X				if(yytext[yyleng-1]!=' ') collectinfo(" ",1);
X			}
X		}
X		else if(identifyctrlword(yytext,yyleng,"\\page"))
X		{ /* Accomplished !!!  */
X			haltfetch();
X			BEGIN(0);
X		}
X	   }
X
X<fetchinfo>[\n{}]  {  /* Ignore */
X		ADDTOBUF;
X	   }
X
X<fetchinfo>.  { /* Collect */
X		ADDTOBUF;
X		if(pinfobuf!=NULL) collectinfo(yytext,yyleng);
X	   }
X
X%%
X
Xvoid printusage()
X{
X	fprintf(stderr,	"Usage: fixrtf [-e encoding] [-i] [-p] < inputfile > outputfile\n"
X			"     Fix RTF file generated by Jade/OpenJade.\n"
X			"Options:\n"
X			"       -e encoding\n"
X			"             Specify encoding to do specific fixing. (GB2312|BIG5)\n"
X			"       -i\n"
X			"             Fill RTF file information, such as title and author,\n"
X			"             hacked from RTF file generated by Jade/OpenJade.\n"
X			"       -p\n"
X			"             Embed linked PNG images into RTF file.\n"
X		);
X}
X
Xint
Xmain(int argc, char *argv[])
X{
X	int ch;
X
X	if(argc<=1)
X	{
X		warnx("You should indicate at least one kind of fixing.");
X		printusage();
X		return 1;
X	}
X
X	while ((ch = getopt(argc, argv, "e:ip")) != -1)
X	{
X		switch (ch)
X		{
X		case 'e':
X			if(strcasecmp(optarg,"GB2312")==0 ||
X				strcasecmp(optarg,"GBK")==0)
X			{
X				encoding=ENCODING_GB2312;
X			}
X			else if(strcasecmp(optarg,"GB18030")==0)
X			{
X				encoding=ENCODING_GB18030;
X			}
X			else if(strcasecmp(optarg,"BIG5")==0)
X			{
X				encoding=ENCODING_BIG5;
X			}
X			break;
X		case 'i':
X			fetchinfo_enable=1; /* One-off */
X			break;
X		case 'p':
X			embedpng_enable=1;
X			break;
X		default:
X			printusage();
X			return 1;
X			break;
X		}
X	}
X
X	yylex();
X
X	return 0;
X}
END-of-src/fixrtf/fixrtf.l
exit
--- zh-docproj.shar ends here ---


>Release-Note:
>Audit-Trail:
>Unformatted:



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200602251905.k1PJ5OhG056456>