Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 17 Oct 2004 11:22:35 +0900 (JST)
From:      Fumihiko Kimura <jfkimura@yahoo.co.jp>
To:        FreeBSD-gnats-submit@FreeBSD.org
Cc:        dinoex@FreeBSD.org
Subject:   ports/72776: update ports: japanese/webalizer
Message-ID:  <200410170222.i9H2MZJx081612@sh0.radio.gr.jp>
Resent-Message-ID: <200410170230.i9H2UKm5079677@freefall.freebsd.org>

next in thread | raw e-mail | index | archive | help

>Number:         72776
>Category:       ports
>Synopsis:       update ports: japanese/webalizer
>Confidential:   no
>Severity:       non-critical
>Priority:       low
>Responsible:    freebsd-ports-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          change-request
>Submitter-Id:   current-users
>Arrival-Date:   Sun Oct 17 02:30:20 GMT 2004
>Closed-Date:
>Last-Modified:
>Originator:     Fumihiko Kimura
>Release:        FreeBSD 4.10-RELEASE i386
>Organization:
>Environment:
>Description:

(Cc MAINTAINER:Dirk Meyer-san)
 
Japanese has a Kanji character code of JIS SJIS EUC UTF-8.
A plurality of cords coexist in "Search Strings" searched by Webalizer.
I think that Japanese most use Makefile.local and solve this.
I did send-pr of a shown patch of Dr. URASHIMA Akira this time.
A default is a street to this.
When I want to apply this thing, I have to set knob.
(it is a being halfhearted mark slightly)
In addition, I add the setting that a Japanese can seem to use usefully to some extent to sample.conf.

>How-To-Repeat:
>Fix:

=== begin  cut here ===
diff -urN webalizer-orig/Makefile webalizer/Makefile
--- webalizer-orig/Makefile	Sat Apr  5 06:32:28 2003
+++ webalizer/Makefile	Thu Oct 14 14:51:52 2004
@@ -9,8 +9,24 @@
 
 MAINTAINER=	dinoex@FreeBSD.org
 
+MASTERDIR=	${.CURDIR}/../../www/webalizer
 WEBALIZER_LANG=	japanese
-MASTERDIR?=	${.CURDIR}/../../www/webalizer
+
+# The patch file is written by URASHIMA Akira
+#       http://tyche.pu-toyama.ac.jp/~a-urasim/webalizer/webalizer-a-urasim_2.patch
+OPTIONS=                WEBALIZER_CONV "Use character code convert patch" off
+
+.if defined(WITH_WEBALIZER_CONV)
+CONFIGURE_ARGS+=	--enable-mininls
+CONFIGURE_ENV+=		LIBS="-L${LOCALBASE}/lib -liconv"
+CFLAGS+=		-I${PREFIX}/include
+.endif
+
+post-patch:
+.if defined(WITH_WEBALIZER_CONV)
+	@cd ${WRKSRC} && ${PATCH} < ${.CURDIR}/files/extra-webalizer-a-urasim_2.patch
+.endif
+	@cd ${WRKSRC} && ${PATCH} < ${.CURDIR}/files/extra-ja-webalizer.conf-dist.patch
 
 .if exists(${.CURDIR}/Makefile.local)
 .include "${.CURDIR}/Makefile.local"
diff -urN webalizer-orig/files/extra-ja-webalizer.conf-dist.patch webalizer/files/extra-ja-webalizer.conf-dist.patch
--- webalizer-orig/files/extra-ja-webalizer.conf-dist.patch	Thu Jan  1 09:00:00 1970
+++ webalizer/files/extra-ja-webalizer.conf-dist.patch	Thu Oct 14 11:49:08 2004
@@ -0,0 +1,67 @@
+--- sample.conf.orig	Fri Sep 29 12:51:42 2000
++++ sample.conf	Thu Oct 14 11:48:21 2004
+@@ -107,9 +107,12 @@
+ 
+ PageType	htm*
+ PageType	cgi
++#PageType	shtml
+ #PageType	phtml
+ #PageType	php3
++#PageType	php
+ #PageType	pl
++#PageType	rb
+ 
+ # UseHTTPS should be used if the analysis is being run on a
+ # secure server, and links to urls should use 'https://' instead
+@@ -153,6 +156,7 @@
+ # is 80 characters, so use multiple lines if needed.
+ 
+ #HTMLHead <META NAME="author" CONTENT="The Webalizer">
++HTMLHead <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=x-euc-jp">
+ 
+ # HTMLBody defined the HTML code to be inserted, starting with the
+ # <BODY> tag.  If not specified, the default is shown below.  If
+@@ -393,6 +397,9 @@
+ HideURL		*.png
+ HideURL		*.PNG
+ HideURL		*.ra
++HideURL		*.css
++HideURL		*.CSS
++HideURL		*.ico
+ 
+ # Hiding agents is kind of futile
+ #HideAgent	RealPlayer
+@@ -412,6 +419,11 @@
+ #GroupReferrer	excite.com/     Excite
+ #GroupReferrer	infoseek.com/   InfoSeek
+ #GroupReferrer	webcrawler.com/ WebCrawler
++#GroupReferrer	yahoo.co.jp/	Yahoo!Japan
++#GroupReferrer	google.co.jp/	GoogleJapan
++#GroupReferrer	infoseek.co.jp/	InfoSeekJapan
++#GroupReferrer	goo.ne.jp/	Goo
++#GroupReferrer	msn.co.jp/	MSNJapan
+ 
+ #GroupUser      root            Admin users
+ #GroupUser      admin           Admin users
+@@ -530,6 +542,21 @@
+ SearchEngine	mamma.com	query=
+ SearchEngine	alltheweb.com	query=
+ SearchEngine	northernlight.com  qr=
++
++SearchEngine	yahoo.co.jp	p=
++SearchEngine	google.co.jp	q=
++SearchEngine	infoseek.co.jp	qt=
++SearchEngine	msn.co.jp	q=
++# ocn
++SearchEngine	goo.ne.jp	MT=
++SearchEngine	biglobe.ne.jp	q=
++SearchEngine	nifty.com	Text=
++# so-net odn
++SearchEngine	excite.co.jp	search=
++SearchEngine	livedoor.com	q=
++SearchEngine	jp.aol.com	query=
++#SearchEngine	.google.	q=
++#SearchEngine	bulkfeeds.net	q=
+ 
+ # The Dump* keywords allow the dumping of Sites, URL's, Referrers
+ # User Agents, Usernames and Search strings to seperate tab delimited
diff -urN webalizer-orig/files/extra-webalizer-a-urasim_2.patch webalizer/files/extra-webalizer-a-urasim_2.patch
--- webalizer-orig/files/extra-webalizer-a-urasim_2.patch	Thu Jan  1 09:00:00 1970
+++ webalizer/files/extra-webalizer-a-urasim_2.patch	Thu Oct 14 11:32:39 2004
@@ -0,0 +1,241 @@
+--- webalizer.c.a-urasim	Wed Apr 17 07:11:31 2002
++++ webalizer.c	Tue Dec 23 23:26:23 2003
+@@ -39,6 +39,7 @@
+ #include <sys/utsname.h>
+ #include <sys/times.h>
+ #include <zlib.h>
++#include <iconv.h>
+ 
+ /* ensure getopt */
+ #ifdef HAVE_GETOPT_H
+@@ -224,6 +225,8 @@
+ char    *f_cp=f_buf+GZ_BUFSIZE;               /* pointer into the buffer  */
+ int     f_end;                                /* count to end of buffer   */ 
+ 
++iconv_t cd_from_sjis, cd_from_utf8;
++
+ /*********************************************/
+ /* MAIN - start here                         */
+ /*********************************************/
+@@ -526,6 +529,9 @@
+ 
+    start_time = times(&mytms);
+ 
++   cd_from_sjis = iconv_open("EUC-JP", "Shift_JIS");
++   cd_from_utf8 = iconv_open("EUC-JP", "UTF-8");
++
+    /*********************************************/
+    /* MAIN PROCESS LOOP - read through log file */
+    /*********************************************/
+@@ -1345,6 +1351,9 @@
+       if (dns_db) close_cache();
+ #endif
+ 
++      iconv_close(cd_from_sjis);
++      iconv_close(cd_from_utf8);
++
+       /* Whew, all done! Exit with completion status (0) */
+       exit(0);
+    }
+@@ -1773,6 +1782,23 @@
+ 
+    if (!str) return NULL;                       /* make sure strings valid */
+ 
++   while(*cp1){  /* for apache log's escape code. */
++     if(*cp1 == '\\' && *(cp1+1) == 'x' &&
++	isxdigit(*(cp1+2)) && isxdigit(*(cp1+3))){
++       *cp2 = from_hex(*(cp1+2))*16 + from_hex(*(cp1+3));
++       if ((*cp2<32)||(*cp2==127)) *cp2='_';
++       cp1+=4; cp2++;
++
++     }
++     else if(*cp1 == '\\' && *(cp1+1) == '\\'){
++       *cp2++='\\';
++       cp1+=2;
++     }
++     else *cp2++ = *cp1++;
++   }
++   *cp2=*cp1;
++
++   cp1=cp2=str;
+    while (*cp1)
+    {
+       if (*cp1=='%')                            /* Found an escape?        */
+@@ -1783,7 +1809,7 @@
+             if (*cp1) *cp2=from_hex(*cp1++)*16; /* convert hex to an ascii */
+             if (*cp1) *cp2+=from_hex(*cp1);     /* (hopefully) character   */
+             if ((*cp2<32)||(*cp2==127)) *cp2='_'; /* make '_' if its bad   */
+-            if (*cp1) cp2++; cp1++;
++            if (*cp1){ cp2++; cp1++;} /* bug? */
+          }
+          else *cp2++='%';
+       }
+@@ -1793,6 +1819,116 @@
+    return str;                                  /* return the string       */
+ }
+ 
++int score_eucj(unsigned char *str)
++{
++  int stat=0;
++  int score=0;
++  int bad=0;
++  if(str==NULL) return -1;
++
++  for(; *str!=0;str++){
++    switch(stat){
++    case 0:
++      if(*str>= 0x20 && *str <= 0x7e) score++; //ASCII
++      else if(*str >= 0xa1 && *str <= 0xfe) stat=1; //KANJI(1)
++      else if(*str == 0x8f); // HOJYO KANJI 
++      else if(*str == 0x8e) stat=2; // KANA
++      else if(*str < 0x20); //CTRL
++      else bad=1;
++      break;
++    case 1:
++      if(*str >= 0xa1 && *str <= 0xfe) score += 2; //KANJI(2)
++      else bad=1;
++      stat=0;
++      break;
++    case 2:
++      if(*str >= 0xa1 && *str <= 0xdf); //hankaku <- 0
++      else  bad=1;
++      stat=0;
++      break;
++    }
++  }
++  if(bad != 0) score = -1;
++  return score;
++}
++
++int score_sjis(unsigned char *str)
++{
++  int stat=0;
++  int score=0;
++  int bad=0;
++  if(str==NULL) return -1;
++
++  for(; *str != 0; str++){
++    switch(stat){
++    case 0:
++      if(*str>= 0x20 && *str <= 0x7e) score++;//ASCII
++      else if((*str >= 0x81 && *str <= 0x9f) ||
++	      (*str >= 0xe0 && *str <= 0xfc)) stat=1; //SJIS(1)
++      else if(*str >= 0xa1 && *str <= 0xdf); // KANA
++      else if(*str < 0x20); // CTRL
++      else bad=1;
++      break;
++    case 1:
++      if((*str >= 0x40 && *str <= 0x7e) ||
++	 (*str >= 0x80 && *str <= 0xfc)) score += 2; //SJIS(2)
++      else bad=1;
++      stat=0;
++      break;
++    }
++  }
++  if(bad != 0) score = -1;
++  return score;
++}
++
++int score_utf8(unsigned char *str)
++{
++  int stat=0;
++  int score=0;
++  int bad=0;
++  if(str==NULL) return -1;
++
++  for(; *str != 0; str++){
++    switch(stat){
++    case 0:
++      if(*str>= 0x20 && *str <= 0x7e) score++; //ASCII
++      else if(*str >= 0xc0 && *str <= 0xdf) stat=1; //greek etc.
++      else if(*str >= 0xe0 && *str <= 0xef) stat=2; //KANJI etc.
++      else if(*str >= 0xf0 && *str <= 0xf7) stat=4; 
++      else if(*str < 0x20); //CTRL
++      else bad=1;
++      break;
++    case 1:
++      if(*str >= 0x80 && *str <= 0xbf) score++;
++      else bad=1;
++      stat=0;
++      break;
++    case 2:
++      if(*str >= 0x80 && *str <= 0xbf) stat=3; //KANJI(2)
++      else {bad=1; stat=0;}
++      break;
++    case 3:
++      if(*str >= 0x80 && *str <= 0xbf) score+=3; //KANJI(3)
++      else bad=1;
++      stat=0;
++      break;
++    case 4:
++    case 5:
++      if(*str >= 0x80 && *str <= 0xbf) stat++;
++      else {bad=1; stat=0;}
++      break;
++    case 6:
++      if(*str >= 0x80 && *str <= 0xbf) score+=4;
++      else bad=1;
++      stat=0;
++      break;
++    }
++  }
++  if(bad != 0) score = -1;
++  return score;
++}
++
++
+ /*********************************************/
+ /* SRCH_STRING - get search strings from ref */
+ /*********************************************/
+@@ -1804,6 +1940,10 @@
+    char srch[80]="";
+    unsigned char *cp1, *cp2, *cps;
+    int  sp_flg=0;
++   int sjis, eucj, utf8;
++   char tmpbuf2[BUFSIZE];
++   size_t inlen, outlen;
++   unsigned char *cp3;
+ 
+    /* Check if search engine referrer or return  */
+    if ( (cps=isinglist(search_list,log_rec.refer))==NULL) return; 
+@@ -1839,9 +1978,39 @@
+    cp1=cp2+strlen(cp2)-1;
+    while (cp1!=cp2) if (isspace(*cp1)) *cp1--='\0'; else break;
+ 
++   utf8=score_utf8(cp2);
++   sjis=score_sjis(cp2);
++   eucj=score_eucj(cp2);
++   if(utf8 >= sjis && utf8 >= eucj){
++     iconv(cd_from_utf8, NULL, 0, NULL, 0);
++     cp3 = cp2;
++     inlen = strlen(cp2)+1;
++     cp1 = tmpbuf2;
++     outlen = sizeof(tmpbuf2);
++     if(iconv(cd_from_utf8, (char **)&cp3, &inlen, (char**)&cp1, &outlen) >= 0 &&
++	inlen == 0){
++       cp2 = tmpbuf2;
++     }
++   }
++   else if(sjis > utf8 && sjis > eucj){
++     iconv(cd_from_sjis, NULL, 0, NULL, 0);
++     cp3 = cp2;
++     inlen = strlen(cp2)+1;
++     cp1 = tmpbuf2;
++     outlen = sizeof(tmpbuf2);
++     if(iconv(cd_from_sjis, (char **)&cp3, &inlen, (char**)&cp1, &outlen) >= 0 &&
++	inlen == 0){
++       cp2 = tmpbuf2;
++     }
++   }
++
+    /* strip invalid chars */
+    cp1=cp2;
+-   while (*cp1!=0) { if ((*cp1<32)||(*cp1==127)) *cp1='_'; cp1++; }
++   while (*cp1!=0) {
++     if ((*cp1<32)||(*cp1==127)) *cp1='_';
++     *cp1=tolower(*cp1);
++     cp1++;
++   }
+ 
+    if (put_snode(cp2,(u_long)1,sr_htab))
+    {
=== ended  cut here ===

>Release-Note:
>Audit-Trail:
>Unformatted:



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200410170222.i9H2MZJx081612>