Date: Wed, 27 Nov 2024 12:01:57 GMT From: Robert Clausecker <fuz@FreeBSD.org> To: ports-committers@FreeBSD.org, dev-commits-ports-all@FreeBSD.org, dev-commits-ports-main@FreeBSD.org Subject: git: 13e3d18eb6e7 - main - textproc/amberfish: update to 1.7.1, take maintainership Message-ID: <202411271201.4ARC1vNF069878@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch main has been updated by fuz: URL: https://cgit.FreeBSD.org/ports/commit/?id=13e3d18eb6e7f2a7771b4facdb72bf388ffd38e5 commit 13e3d18eb6e7f2a7771b4facdb72bf388ffd38e5 Author: Nassib Nassar <nrn@etymon.com> AuthorDate: 2024-11-25 15:22:37 +0000 Commit: Robert Clausecker <fuz@FreeBSD.org> CommitDate: 2024-11-27 11:54:52 +0000 textproc/amberfish: update to 1.7.1, take maintainership - chase to new upstream - always install man pages as per policy - license changed to MIT - submitter becomes maintainer - turn static REINPLACE_CMD use into patches Changelog: https://gitlab.com/amberfish/amberfish/-/releases PR: 282880 --- textproc/amberfish/Makefile | 64 ++- textproc/amberfish/distinfo | 5 +- textproc/amberfish/files/patch-Makefile | 26 ++ .../amberfish/files/patch-src_backend_Makefile.in | 10 + textproc/amberfish/files/porter.cc | 438 --------------------- textproc/amberfish/pkg-descr | 25 +- 6 files changed, 73 insertions(+), 495 deletions(-) diff --git a/textproc/amberfish/Makefile b/textproc/amberfish/Makefile index f433337d2a4e..840af7e07e78 100644 --- a/textproc/amberfish/Makefile +++ b/textproc/amberfish/Makefile @@ -1,54 +1,46 @@ PORTNAME= amberfish -PORTVERSION= 1.6.4 -PORTREVISION= 3 +DISTVERSIONPREFIX= v +DISTVERSION= 1.7.1 CATEGORIES= textproc databases -MASTER_SITES= SF/${PORTNAME}/Amberfish%20source%20-%20stable/${PORTVERSION} \ - http://etymon.com/software/amberfish/stable/ -MAINTAINER= ports@FreeBSD.org -COMMENT= General purpose text retrieval Software -WWW= https://web.archive.org/web/20100419215307/http://www.etymon.com/tr.html +MAINTAINER= nrn@etymon.com +COMMENT= Full-text search engine with command-line interface +WWW= https://gitlab.com/amberfish/amberfish -LICENSE= GPLv2 -LICENSE_FILE= ${WRKSRC}/COPYING +LICENSE= MIT +LICENSE_FILE= ${WRKSRC}/LICENSE LIB_DEPENDS= libxerces-c.so:textproc/xerces-c3 USES= gmake +USE_GITLAB= yes GNU_CONFIGURE= yes -GNU_CONFIGURE_MANPREFIX=${PREFIX}/share - -ALL_TARGET= all html - -PLIST_FILES= bin/af -PORTDOCS= * +ALL_TARGET= all + +PLIST_FILES= bin/af \ + share/man/man1/af.1.gz \ + share/man/man3/afclose.3.gz \ + share/man/man3/afgetresultmd.3.gz \ + share/man/man3/afopen.3.gz \ + share/man/man3/afsearch.3.gz \ + share/man/man3/afsortdocid.3.gz \ + share/man/man3/afsortscore.3.gz +PORTDOCS= amberfish.html OPTIONS_DEFINE= DOCS +DOCS_BUILD_DEPENDS= asciidoctor:textproc/rubygem-asciidoctor +DOCS_ALL_TARGET= html -DOCS_USES= makeinfo -DOCS_PLIST_FILES= share/man/man1/af.1.gz - -post-extract: - ${CP} ${FILESDIR}/porter.cc ${WRKSRC}/src - -post-patch: - @${REINPLACE_CMD} -e \ - 's|$${MAKEFLAGS} ||' ${WRKSRC}/Makefile - @${REINPLACE_CMD} -e \ - 's|cp |$${BSD_INSTALL_MAN} |' ${WRKSRC}/doc/Makefile.in - @${REINPLACE_CMD} -e \ - 's|-O3 |@CFLAGS@ | ; \ - s|make strip|| ; \ - s|cp |$${BSD_INSTALL_PROGRAM} |' ${WRKSRC}/src/Makefile.in +post-configure: + ${ECHO_CMD} "#define AF_VERSION \"v${DISTVERSION}\"" > ${WRKSRC}/src/backend/version.h + ${ECHO_CMD} v${DISTVERSION} > ${WRKSRC}/doc/version.adoc -post-patch-DOCS-off: - @${REINPLACE_CMD} -e \ - '/cd doc/d' ${WRKSRC}/Makefile +post-install: + ${STRIP_CMD} ${STAGEDIR}${PREFIX}/bin/af post-install-DOCS-on: - @${MKDIR} ${STAGEDIR}${DOCSDIR} - ${INSTALL_DATA} ${WRKSRC}/amberfish.png ${STAGEDIR}${DOCSDIR} - ${INSTALL_DATA} ${WRKSRC}/doc/html/*.html ${STAGEDIR}${DOCSDIR} + ${MKDIR} ${STAGEDIR}${DOCSDIR} + ${INSTALL_DATA} ${WRKSRC}/doc/amberfish.html ${STAGEDIR}${DOCSDIR} .include <bsd.port.mk> diff --git a/textproc/amberfish/distinfo b/textproc/amberfish/distinfo index d2192298a68c..d0660e389c00 100644 --- a/textproc/amberfish/distinfo +++ b/textproc/amberfish/distinfo @@ -1,2 +1,3 @@ -SHA256 (amberfish-1.6.4.tar.gz) = 155ac6e6b9b76fb7cbd94952548f718ab6add72c3b4fd2482d89abb39d96ce76 -SIZE (amberfish-1.6.4.tar.gz) = 127198 +TIMESTAMP = 1732616395 +SHA256 (amberfish-v1.7.1.tar.bz2) = 67c8b007be4652ceaafe0d93c9ac6ef40541e6163f820f8271d02704817af9a0 +SIZE (amberfish-v1.7.1.tar.bz2) = 117241 diff --git a/textproc/amberfish/files/patch-Makefile b/textproc/amberfish/files/patch-Makefile new file mode 100644 index 000000000000..6aefc6fb0f5d --- /dev/null +++ b/textproc/amberfish/files/patch-Makefile @@ -0,0 +1,26 @@ +--- Makefile.orig 2024-11-23 13:45:47 UTC ++++ Makefile +@@ -16,18 +16,18 @@ strip: + cd doc ; ${MAKE} html + + strip: +- cd src/backend ; ${MAKE} ${MAKEFLAGS} strip ++ cd src/backend ; ${MAKE} strip + # cd src/interface ; ${MAKE} ${MAKEFLAGS} strip + + install: +- cd src/backend ; ${MAKE} ${MAKEFLAGS} install ++ cd src/backend ; ${MAKE} install + # cd src/interface ; ${MAKE} ${MAKEFLAGS} install +- cd doc ; ${MAKE} ${MAKEFLAGS} install ++ cd doc ; ${MAKE} install + + uninstall: +- cd src/backend ; ${MAKE} ${MAKEFLAGS} uninstall ++ cd src/backend ; ${MAKE} uninstall + # cd src/interface ; ${MAKE} ${MAKEFLAGS} uninstall +- cd doc ; ${MAKE} ${MAKEFLAGS} uninstall ++ cd doc ; ${MAKE} uninstall + + clean: + rm -fr autom4te.cache diff --git a/textproc/amberfish/files/patch-src_backend_Makefile.in b/textproc/amberfish/files/patch-src_backend_Makefile.in new file mode 100644 index 000000000000..4059d7efe3d2 --- /dev/null +++ b/textproc/amberfish/files/patch-src_backend_Makefile.in @@ -0,0 +1,10 @@ +--- src/backend/Makefile.in.orig 2024-11-23 13:47:04 UTC ++++ src/backend/Makefile.in +@@ -62,7 +62,6 @@ install: all + strip ${AF} + + install: all +- make strip + mkdir -p ${PREFIXBIN} + cp ${BIN} ${PREFIXBIN}/. + diff --git a/textproc/amberfish/files/porter.cc b/textproc/amberfish/files/porter.cc deleted file mode 100644 index a997d88ef24c..000000000000 --- a/textproc/amberfish/files/porter.cc +++ /dev/null @@ -1,438 +0,0 @@ - -/* This is the Porter stemming algorithm, coded up in ANSI C by the - author. It may be be regarded as cononical, in that it follows the - algorithm presented in - - Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14, - no. 3, pp 130-137, - - only differing from it at the points maked --DEPARTURE-- below. - - See also http://www.tartarus.org/~martin/PorterStemmer - -The algorithm as described in the paper could be exactly replicated -by adjusting the points of DEPARTURE, but this is barely necessary, -because (a) the points of DEPARTURE are definitely improvements, and -(b) no encoding of the Porter stemmer I have seen is anything like -as exact as this version, even with the points of DEPARTURE! - -You can compile it on Unix with 'gcc -O3 -o stem stem.c' after which -'stem' takes a list of inputs and sends the stemmed equivalent to -stdout. - -The algorithm as encoded here is particularly fast. - -Release 1 -*/ - -#include <string.h> /* for memmove */ - -#define TRUE 1 -#define FALSE 0 - -/* The main part of the stemming algorithm starts here. b is a buffer - holding a word to be stemmed. The letters are in b[k0], b[k0+1] ... - ending at b[k]. In fact k0 = 0 in this demo program. k is readjusted - downwards as the stemming progresses. Zero termination is not in fact - used in the algorithm. - - Note that only lower case sequences are stemmed. Forcing to lower case - should be done before stem(...) is called. -*/ - -static char * b; /* buffer for word to be stemmed */ -static int k,k0,j; /* j is a general offset into the string */ - -/* cons(i) is TRUE <=> b[i] is a consonant. */ - -static int cons(int i) -{ - switch (b[i]) - { - case 'a': case 'e': case 'i': case 'o': case 'u': return FALSE; - case 'y': return (i==k0) ? TRUE : !cons(i-1); - default: return TRUE; - } -} - - -/* m() measures the number of consonant sequences between k0 and j. if c is - a consonant sequence and v a vowel sequence, and <..> indicates arbitrary - presence, - - <c><v> gives 0 - <c>vc<v> gives 1 - <c>vcvc<v> gives 2 - <c>vcvcvc<v> gives 3 - .... -*/ - -static int m() -{ - int n = 0; - int i = k0; - while(TRUE) - { - if (i > j) return n; - if (! cons(i)) break; i++; - } - i++; - while(TRUE) - { - while(TRUE) - { - if (i > j) return n; - if (cons(i)) break; - i++; - } - i++; - n++; - while(TRUE) - { - if (i > j) return n; - if (! cons(i)) break; - i++; - } - i++; - } -} - - -/* vowelinstem() is TRUE <=> k0,...j contains a vowel */ - -static int vowelinstem() -{ - int i; for (i = k0; i <= j; i++) if (! cons(i)) return TRUE; - return FALSE; -} - - -/* doublec(j) is TRUE <=> j,(j-1) contain a double consonant. */ - -static int doublec(int j) -{ - if (j < k0+1) return FALSE; - if (b[j] != b[j-1]) return FALSE; - return cons(j); -} - - -/* cvc(i) is TRUE <=> i-2,i-1,i has the form consonant - vowel - consonant - and also if the second c is not w,x or y. this is used when trying to - restore an e at the end of a short word. e.g. - - cav(e), lov(e), hop(e), crim(e), but - snow, box, tray. - -*/ - -static int cvc(int i) -{ - if (i < k0+2 || !cons(i) || cons(i-1) || !cons(i-2)) return FALSE; - { - int ch = b[i]; - if (ch == 'w' || ch == 'x' || ch == 'y') return FALSE; - } - return TRUE; -} - - -/* ends(s) is TRUE <=> k0,...k ends with the string s. */ - -static int ends(char * s) -{ - int length = s[0]; - if (s[length] != b[k]) return FALSE; /* tiny speed-up */ - if (length > k-k0+1) return FALSE; - if (memcmp(b+k-length+1,s+1,length) != 0) return FALSE; - j = k-length; - return TRUE; -} - - -/* setto(s) sets (j+1),...k to the characters in the string s, readjusting - k. */ - -static void setto(char * s) -{ - int length = s[0]; - memmove(b+j+1,s+1,length); - k = j+length; -} - - -/* r(s) is used further down. */ - -static void r(char * s) { if (m() > 0) setto(s); } - -/* step1ab() gets rid of plurals and -ed or -ing. e.g. - - caresses -> caress - ponies -> poni - ties -> ti - caress -> caress - cats -> cat - - feed -> feed - agreed -> agree - disabled -> disable - - matting -> mat - mating -> mate - meeting -> meet - milling -> mill - messing -> mess - - meetings -> meet - -*/ - -static void step1ab() -{ - if (b[k] == 's') - { - if (ends("\04" "sses")) k -= 2; else - if (ends("\03" "ies")) setto("\01" "i"); else - if (b[k-1] != 's') k--; - } - if (ends("\03" "eed")) { if (m() > 0) k--; } - else - if ((ends("\02" "ed") || ends("\03" "ing")) && vowelinstem()) - { - k = j; - if (ends("\02" "at")) setto("\03" "ate"); else - if (ends("\02" "bl")) setto("\03" "ble"); else - if (ends("\02" "iz")) setto("\03" "ize"); else - if (doublec(k)) - { - k--; - { - int ch = b[k]; - if (ch == 'l' || ch == 's' || ch == 'z') k++; - } - } - else if (m() == 1 && cvc(k)) setto("\01" "e"); - } -} - - -/* step1c() turns terminal y to i when there is another vowel in the stem. */ - -static void step1c() { if (ends("\01" "y") && vowelinstem()) b[k] = 'i'; } - -/* step2() maps double suffices to single ones. so -ization ( = -ize plus - -ation) maps to -ize etc. note that the string before the suffix must give - m() > 0. */ - -static void step2() -{ - switch (b[k-1]) - { - case 'a': if (ends("\07" "ational")) { r("\03" "ate"); break; } - if (ends("\06" "tional")) { r("\04" "tion"); break; } - break; - case 'c': if (ends("\04" "enci")) { r("\04" "ence"); break; } - if (ends("\04" "anci")) { r("\04" "ance"); break; } - break; - case 'e': if (ends("\04" "izer")) { r("\03" "ize"); break; } - break; - case 'l': if (ends("\03" "bli")) /*-DEPARTURE-*/ - { - r("\03" "ble"); break; - } - -/* To match the published algorithm, replace this line with - case 'l': if (ends("\04" "abli")) { r("\04" "able"); break; } */ - - if (ends("\04" "alli")) { r("\02" "al"); break; } - if (ends("\05" "entli")) { r("\03" "ent"); break; } - if (ends("\03" "eli")) { r("\01" "e"); break; } - if (ends("\05" "ousli")) { r("\03" "ous"); break; } - break; - case 'o': if (ends("\07" "ization")) { r("\03" "ize"); break; } - if (ends("\05" "ation")) { r("\03" "ate"); break; } - if (ends("\04" "ator")) { r("\03" "ate"); break; } - break; - case 's': if (ends("\05" "alism")) { r("\02" "al"); break; } - if (ends("\07" "iveness")) { r("\03" "ive"); break; } - if (ends("\07" "fulness")) { r("\03" "ful"); break; } - if (ends("\07" "ousness")) { r("\03" "ous"); break; } - break; - case 't': if (ends("\05" "aliti")) { r("\02" "al"); break; } - if (ends("\05" "iviti")) { r("\03" "ive"); break; } - if (ends("\06" "biliti")) { r("\03" "ble"); break; } - break; - case 'g': if (ends("\04" "logi")) /*-DEPARTURE-*/ - { - r("\03" "log"); break; - } - -/* To match the published algorithm, delete this line */ - - } -} - - -/* step3() deals with -ic-, -full, -ness etc. similar strategy to step2. */ - -static void step3() -{ - switch (b[k]) - { - case 'e': if (ends("\05" "icate")) { r("\02" "ic"); break; } - if (ends("\05" "ative")) { r("\00" ""); break; } - if (ends("\05" "alize")) { r("\02" "al"); break; } - break; - case 'i': if (ends("\05" "iciti")) { r("\02" "ic"); break; } - break; - case 'l': if (ends("\04" "ical")) { r("\02" "ic"); break; } - if (ends("\03" "ful")) { r("\00" ""); break; } - break; - case 's': if (ends("\04" "ness")) { r("\00" ""); break; } - break; - } -} - - -/* step4() takes off -ant, -ence etc., in context <c>vcvc<v>. */ - -static void step4() -{ - switch (b[k-1]) - { - case 'a': if (ends("\02" "al")) break; return; - case 'c': if (ends("\04" "ance")) break; - if (ends("\04" "ence")) break; return; - case 'e': if (ends("\02" "er")) break; return; - case 'i': if (ends("\02" "ic")) break; return; - case 'l': if (ends("\04" "able")) break; - if (ends("\04" "ible")) break; return; - case 'n': if (ends("\03" "ant")) break; - if (ends("\05" "ement")) break; - if (ends("\04" "ment")) break; - if (ends("\03" "ent")) break; return; - case 'o': if (ends("\03" "ion") && (b[j] == 's' || b[j] == 't')) break; - if (ends("\02" "ou")) break; return; -/* takes care of -ous */ - case 's': if (ends("\03" "ism")) break; return; - case 't': if (ends("\03" "ate")) break; - if (ends("\03" "iti")) break; return; - case 'u': if (ends("\03" "ous")) break; return; - case 'v': if (ends("\03" "ive")) break; return; - case 'z': if (ends("\03" "ize")) break; return; - default: return; - } - if (m() > 1) k = j; -} - - -/* step5() removes a final -e if m() > 1, and changes -ll to -l if - m() > 1. */ - -static void step5() -{ - j = k; - if (b[k] == 'e') - { - int a = m(); - if (a > 1 || a == 1 && !cvc(k-1)) k--; - } - if (b[k] == 'l' && doublec(k) && m() > 1) k--; -} - - -/* In stem(p,i,j), p is a char pointer, and the string to be stemmed is from - p[i] to p[j] inclusive. Typically i is zero and j is the offset to the last - character of a string, (p[j+1] == '\0'). The stemmer adjusts the - characters p[i] ... p[j] and returns the new end-point of the string, k. - Stemming never increases word length, so i <= k <= j. To turn the stemmer - into a module, declare 'stem' as extern, and delete the remainder of this - file. -*/ - -int stem(char * p, int i, int j) -{ /* copy the parameters into statics */ - b = p; k = j; k0 = i; - if (k <= k0+1) return k; /*-DEPARTURE-*/ - -/* With this line, strings of length 1 or 2 don't go through the - stemming process, although no mention is made of this in the - published algorithm. Remove the line to match the published - algorithm. */ - - step1ab(); step1c(); step2(); step3(); step4(); step5(); - return k; -} - - -/*--------------------stemmer definition ends here------------------------*/ - -#include <stdio.h> -#include <stdlib.h> /* for malloc, free */ -#include <ctype.h> /* for isupper, islower, tolower */ - -static char * s; /* a char * (=string) pointer; passed into b above */ - -#define INC 50 /* size units in which s is increased */ -static int i_max = INC; /* maximum offset in s */ - -void increase_s() -{ - i_max += INC; - { - char * new_s = (char *) malloc(i_max+1); - { /* copy across */ - int i; for (i = 0; i < i_max; i++) new_s[i] = s[i]; - } - free(s); s = new_s; - } -} - - -#define LETTER(ch) (isupper(ch) || islower(ch)) - -static void stemfile(FILE * f) -{ - while(TRUE) - { - int ch = getc(f); - if (ch == EOF) return; - if (LETTER(ch)) - { - int i = 0; - while(TRUE) - { - if (i == i_max) increase_s(); - - ch = tolower(ch); /* forces lower case */ - - s[i] = ch; i++; - ch = getc(f); - if (!LETTER(ch)) { ungetc(ch,f); break; } - } - s[stem(s,0,i-1)+1] = 0; -/* the previous line calls the stemmer and uses its result to - zero-terminate the string in s */ - printf("%s",s); - } - else putchar(ch); - } -} - -/* - * Commented out as required by amberfish's INSTALL file - * - int main(int argc, char * argv[]) - { - int i; - s = (char *) malloc(i_max+1); - for (i = 1; i < argc; i++) - { - FILE * f = fopen(argv[i],"r"); - if (f == 0) { fprintf(stderr,"File %s not found\n",argv[i]); exit(1); } - stemfile(f); - } - free(s); - return 0; - } -*/ diff --git a/textproc/amberfish/pkg-descr b/textproc/amberfish/pkg-descr index 07b00a78861a..1b305a38baea 100644 --- a/textproc/amberfish/pkg-descr +++ b/textproc/amberfish/pkg-descr @@ -1,19 +1,6 @@ -Amberfish is general purpose text retrieval software, developed at Etymon -by Nassib Nassar and distributed as open source software under the terms -of version 2 of the GNU General Public License (GPL). Its distinguishing -features are indexing/search of semi-structured text (i.e. both free tex -and multiply nested fields), built-in support for XML documents using the -Xerces library, structured queries allowing generalized field/tag paths, -hierarchical result sets (XML only), automatic searching across multiple -databases (allowing modular indexing), TREC format results, efficient -indexing, and relatively low memory requirements during indexing (and the -ability to index documents larger than available memory). Z39.50 support -is available. Other features include Boolean queries, right truncation, -phrase searching, relevance ranking, support for multiple documents per -file, incremental indexing, and easy integration with other UNIX tools, -The architecture is also designed to permit proximity queries; however, -they are not fully implemented at present. - -This port also includes the Porter stemming algorithm for suffix -stripping, available at: - http://www.tartarus.org/~martin/PorterStemmer +Amberfish is a full-text search engine with a command-line interface. +Its features include free-text and Boolean queries, relevance-ranked +results, wildcard search, phrase search, field search and structured +field path queries for XML, multiple documents per file and nested +documents, searching across multiple indexes, incremental update of +indexes, and low memory requirements for building indexes.
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202411271201.4ARC1vNF069878>