Date: Sun, 17 Oct 2004 11:22:35 +0900 (JST) From: Fumihiko Kimura <jfkimura@yahoo.co.jp> To: FreeBSD-gnats-submit@FreeBSD.org Cc: dinoex@FreeBSD.org Subject: ports/72776: update ports: japanese/webalizer Message-ID: <200410170222.i9H2MZJx081612@sh0.radio.gr.jp> Resent-Message-ID: <200410170230.i9H2UKm5079677@freefall.freebsd.org>
next in thread | raw e-mail | index | archive | help
>Number: 72776 >Category: ports >Synopsis: update ports: japanese/webalizer >Confidential: no >Severity: non-critical >Priority: low >Responsible: freebsd-ports-bugs >State: open >Quarter: >Keywords: >Date-Required: >Class: change-request >Submitter-Id: current-users >Arrival-Date: Sun Oct 17 02:30:20 GMT 2004 >Closed-Date: >Last-Modified: >Originator: Fumihiko Kimura >Release: FreeBSD 4.10-RELEASE i386 >Organization: >Environment: >Description: (Cc MAINTAINER:Dirk Meyer-san) Japanese has a Kanji character code of JIS SJIS EUC UTF-8. A plurality of cords coexist in "Search Strings" searched by Webalizer. I think that Japanese most use Makefile.local and solve this. I did send-pr of a shown patch of Dr. URASHIMA Akira this time. A default is a street to this. When I want to apply this thing, I have to set knob. (it is a being halfhearted mark slightly) In addition, I add the setting that a Japanese can seem to use usefully to some extent to sample.conf. >How-To-Repeat: >Fix: === begin cut here === diff -urN webalizer-orig/Makefile webalizer/Makefile --- webalizer-orig/Makefile Sat Apr 5 06:32:28 2003 +++ webalizer/Makefile Thu Oct 14 14:51:52 2004 @@ -9,8 +9,24 @@ MAINTAINER= dinoex@FreeBSD.org +MASTERDIR= ${.CURDIR}/../../www/webalizer WEBALIZER_LANG= japanese -MASTERDIR?= ${.CURDIR}/../../www/webalizer + +# The patch file is written by URASHIMA Akira +# http://tyche.pu-toyama.ac.jp/~a-urasim/webalizer/webalizer-a-urasim_2.patch +OPTIONS= WEBALIZER_CONV "Use character code convert patch" off + +.if defined(WITH_WEBALIZER_CONV) +CONFIGURE_ARGS+= --enable-mininls +CONFIGURE_ENV+= LIBS="-L${LOCALBASE}/lib -liconv" +CFLAGS+= -I${PREFIX}/include +.endif + +post-patch: +.if defined(WITH_WEBALIZER_CONV) + @cd ${WRKSRC} && ${PATCH} < ${.CURDIR}/files/extra-webalizer-a-urasim_2.patch +.endif + @cd ${WRKSRC} && ${PATCH} < ${.CURDIR}/files/extra-ja-webalizer.conf-dist.patch .if exists(${.CURDIR}/Makefile.local) .include "${.CURDIR}/Makefile.local" diff -urN webalizer-orig/files/extra-ja-webalizer.conf-dist.patch webalizer/files/extra-ja-webalizer.conf-dist.patch --- webalizer-orig/files/extra-ja-webalizer.conf-dist.patch Thu Jan 1 09:00:00 1970 +++ webalizer/files/extra-ja-webalizer.conf-dist.patch Thu Oct 14 11:49:08 2004 @@ -0,0 +1,67 @@ +--- sample.conf.orig Fri Sep 29 12:51:42 2000 ++++ sample.conf Thu Oct 14 11:48:21 2004 +@@ -107,9 +107,12 @@ + + PageType htm* + PageType cgi ++#PageType shtml + #PageType phtml + #PageType php3 ++#PageType php + #PageType pl ++#PageType rb + + # UseHTTPS should be used if the analysis is being run on a + # secure server, and links to urls should use 'https://' instead +@@ -153,6 +156,7 @@ + # is 80 characters, so use multiple lines if needed. + + #HTMLHead <META NAME="author" CONTENT="The Webalizer"> ++HTMLHead <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=x-euc-jp"> + + # HTMLBody defined the HTML code to be inserted, starting with the + # <BODY> tag. If not specified, the default is shown below. If +@@ -393,6 +397,9 @@ + HideURL *.png + HideURL *.PNG + HideURL *.ra ++HideURL *.css ++HideURL *.CSS ++HideURL *.ico + + # Hiding agents is kind of futile + #HideAgent RealPlayer +@@ -412,6 +419,11 @@ + #GroupReferrer excite.com/ Excite + #GroupReferrer infoseek.com/ InfoSeek + #GroupReferrer webcrawler.com/ WebCrawler ++#GroupReferrer yahoo.co.jp/ Yahoo!Japan ++#GroupReferrer google.co.jp/ GoogleJapan ++#GroupReferrer infoseek.co.jp/ InfoSeekJapan ++#GroupReferrer goo.ne.jp/ Goo ++#GroupReferrer msn.co.jp/ MSNJapan + + #GroupUser root Admin users + #GroupUser admin Admin users +@@ -530,6 +542,21 @@ + SearchEngine mamma.com query= + SearchEngine alltheweb.com query= + SearchEngine northernlight.com qr= ++ ++SearchEngine yahoo.co.jp p= ++SearchEngine google.co.jp q= ++SearchEngine infoseek.co.jp qt= ++SearchEngine msn.co.jp q= ++# ocn ++SearchEngine goo.ne.jp MT= ++SearchEngine biglobe.ne.jp q= ++SearchEngine nifty.com Text= ++# so-net odn ++SearchEngine excite.co.jp search= ++SearchEngine livedoor.com q= ++SearchEngine jp.aol.com query= ++#SearchEngine .google. q= ++#SearchEngine bulkfeeds.net q= + + # The Dump* keywords allow the dumping of Sites, URL's, Referrers + # User Agents, Usernames and Search strings to seperate tab delimited diff -urN webalizer-orig/files/extra-webalizer-a-urasim_2.patch webalizer/files/extra-webalizer-a-urasim_2.patch --- webalizer-orig/files/extra-webalizer-a-urasim_2.patch Thu Jan 1 09:00:00 1970 +++ webalizer/files/extra-webalizer-a-urasim_2.patch Thu Oct 14 11:32:39 2004 @@ -0,0 +1,241 @@ +--- webalizer.c.a-urasim Wed Apr 17 07:11:31 2002 ++++ webalizer.c Tue Dec 23 23:26:23 2003 +@@ -39,6 +39,7 @@ + #include <sys/utsname.h> + #include <sys/times.h> + #include <zlib.h> ++#include <iconv.h> + + /* ensure getopt */ + #ifdef HAVE_GETOPT_H +@@ -224,6 +225,8 @@ + char *f_cp=f_buf+GZ_BUFSIZE; /* pointer into the buffer */ + int f_end; /* count to end of buffer */ + ++iconv_t cd_from_sjis, cd_from_utf8; ++ + /*********************************************/ + /* MAIN - start here */ + /*********************************************/ +@@ -526,6 +529,9 @@ + + start_time = times(&mytms); + ++ cd_from_sjis = iconv_open("EUC-JP", "Shift_JIS"); ++ cd_from_utf8 = iconv_open("EUC-JP", "UTF-8"); ++ + /*********************************************/ + /* MAIN PROCESS LOOP - read through log file */ + /*********************************************/ +@@ -1345,6 +1351,9 @@ + if (dns_db) close_cache(); + #endif + ++ iconv_close(cd_from_sjis); ++ iconv_close(cd_from_utf8); ++ + /* Whew, all done! Exit with completion status (0) */ + exit(0); + } +@@ -1773,6 +1782,23 @@ + + if (!str) return NULL; /* make sure strings valid */ + ++ while(*cp1){ /* for apache log's escape code. */ ++ if(*cp1 == '\\' && *(cp1+1) == 'x' && ++ isxdigit(*(cp1+2)) && isxdigit(*(cp1+3))){ ++ *cp2 = from_hex(*(cp1+2))*16 + from_hex(*(cp1+3)); ++ if ((*cp2<32)||(*cp2==127)) *cp2='_'; ++ cp1+=4; cp2++; ++ ++ } ++ else if(*cp1 == '\\' && *(cp1+1) == '\\'){ ++ *cp2++='\\'; ++ cp1+=2; ++ } ++ else *cp2++ = *cp1++; ++ } ++ *cp2=*cp1; ++ ++ cp1=cp2=str; + while (*cp1) + { + if (*cp1=='%') /* Found an escape? */ +@@ -1783,7 +1809,7 @@ + if (*cp1) *cp2=from_hex(*cp1++)*16; /* convert hex to an ascii */ + if (*cp1) *cp2+=from_hex(*cp1); /* (hopefully) character */ + if ((*cp2<32)||(*cp2==127)) *cp2='_'; /* make '_' if its bad */ +- if (*cp1) cp2++; cp1++; ++ if (*cp1){ cp2++; cp1++;} /* bug? */ + } + else *cp2++='%'; + } +@@ -1793,6 +1819,116 @@ + return str; /* return the string */ + } + ++int score_eucj(unsigned char *str) ++{ ++ int stat=0; ++ int score=0; ++ int bad=0; ++ if(str==NULL) return -1; ++ ++ for(; *str!=0;str++){ ++ switch(stat){ ++ case 0: ++ if(*str>= 0x20 && *str <= 0x7e) score++; //ASCII ++ else if(*str >= 0xa1 && *str <= 0xfe) stat=1; //KANJI(1) ++ else if(*str == 0x8f); // HOJYO KANJI ++ else if(*str == 0x8e) stat=2; // KANA ++ else if(*str < 0x20); //CTRL ++ else bad=1; ++ break; ++ case 1: ++ if(*str >= 0xa1 && *str <= 0xfe) score += 2; //KANJI(2) ++ else bad=1; ++ stat=0; ++ break; ++ case 2: ++ if(*str >= 0xa1 && *str <= 0xdf); //hankaku <- 0 ++ else bad=1; ++ stat=0; ++ break; ++ } ++ } ++ if(bad != 0) score = -1; ++ return score; ++} ++ ++int score_sjis(unsigned char *str) ++{ ++ int stat=0; ++ int score=0; ++ int bad=0; ++ if(str==NULL) return -1; ++ ++ for(; *str != 0; str++){ ++ switch(stat){ ++ case 0: ++ if(*str>= 0x20 && *str <= 0x7e) score++;//ASCII ++ else if((*str >= 0x81 && *str <= 0x9f) || ++ (*str >= 0xe0 && *str <= 0xfc)) stat=1; //SJIS(1) ++ else if(*str >= 0xa1 && *str <= 0xdf); // KANA ++ else if(*str < 0x20); // CTRL ++ else bad=1; ++ break; ++ case 1: ++ if((*str >= 0x40 && *str <= 0x7e) || ++ (*str >= 0x80 && *str <= 0xfc)) score += 2; //SJIS(2) ++ else bad=1; ++ stat=0; ++ break; ++ } ++ } ++ if(bad != 0) score = -1; ++ return score; ++} ++ ++int score_utf8(unsigned char *str) ++{ ++ int stat=0; ++ int score=0; ++ int bad=0; ++ if(str==NULL) return -1; ++ ++ for(; *str != 0; str++){ ++ switch(stat){ ++ case 0: ++ if(*str>= 0x20 && *str <= 0x7e) score++; //ASCII ++ else if(*str >= 0xc0 && *str <= 0xdf) stat=1; //greek etc. ++ else if(*str >= 0xe0 && *str <= 0xef) stat=2; //KANJI etc. ++ else if(*str >= 0xf0 && *str <= 0xf7) stat=4; ++ else if(*str < 0x20); //CTRL ++ else bad=1; ++ break; ++ case 1: ++ if(*str >= 0x80 && *str <= 0xbf) score++; ++ else bad=1; ++ stat=0; ++ break; ++ case 2: ++ if(*str >= 0x80 && *str <= 0xbf) stat=3; //KANJI(2) ++ else {bad=1; stat=0;} ++ break; ++ case 3: ++ if(*str >= 0x80 && *str <= 0xbf) score+=3; //KANJI(3) ++ else bad=1; ++ stat=0; ++ break; ++ case 4: ++ case 5: ++ if(*str >= 0x80 && *str <= 0xbf) stat++; ++ else {bad=1; stat=0;} ++ break; ++ case 6: ++ if(*str >= 0x80 && *str <= 0xbf) score+=4; ++ else bad=1; ++ stat=0; ++ break; ++ } ++ } ++ if(bad != 0) score = -1; ++ return score; ++} ++ ++ + /*********************************************/ + /* SRCH_STRING - get search strings from ref */ + /*********************************************/ +@@ -1804,6 +1940,10 @@ + char srch[80]=""; + unsigned char *cp1, *cp2, *cps; + int sp_flg=0; ++ int sjis, eucj, utf8; ++ char tmpbuf2[BUFSIZE]; ++ size_t inlen, outlen; ++ unsigned char *cp3; + + /* Check if search engine referrer or return */ + if ( (cps=isinglist(search_list,log_rec.refer))==NULL) return; +@@ -1839,9 +1978,39 @@ + cp1=cp2+strlen(cp2)-1; + while (cp1!=cp2) if (isspace(*cp1)) *cp1--='\0'; else break; + ++ utf8=score_utf8(cp2); ++ sjis=score_sjis(cp2); ++ eucj=score_eucj(cp2); ++ if(utf8 >= sjis && utf8 >= eucj){ ++ iconv(cd_from_utf8, NULL, 0, NULL, 0); ++ cp3 = cp2; ++ inlen = strlen(cp2)+1; ++ cp1 = tmpbuf2; ++ outlen = sizeof(tmpbuf2); ++ if(iconv(cd_from_utf8, (char **)&cp3, &inlen, (char**)&cp1, &outlen) >= 0 && ++ inlen == 0){ ++ cp2 = tmpbuf2; ++ } ++ } ++ else if(sjis > utf8 && sjis > eucj){ ++ iconv(cd_from_sjis, NULL, 0, NULL, 0); ++ cp3 = cp2; ++ inlen = strlen(cp2)+1; ++ cp1 = tmpbuf2; ++ outlen = sizeof(tmpbuf2); ++ if(iconv(cd_from_sjis, (char **)&cp3, &inlen, (char**)&cp1, &outlen) >= 0 && ++ inlen == 0){ ++ cp2 = tmpbuf2; ++ } ++ } ++ + /* strip invalid chars */ + cp1=cp2; +- while (*cp1!=0) { if ((*cp1<32)||(*cp1==127)) *cp1='_'; cp1++; } ++ while (*cp1!=0) { ++ if ((*cp1<32)||(*cp1==127)) *cp1='_'; ++ *cp1=tolower(*cp1); ++ cp1++; ++ } + + if (put_snode(cp2,(u_long)1,sr_htab)) + { === ended cut here === >Release-Note: >Audit-Trail: >Unformatted:
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200410170222.i9H2MZJx081612>