Date: Sat, 8 Dec 2018 04:26:45 +0000 (UTC) From: Yuri Victorovich <yuri@FreeBSD.org> To: ports-committers@freebsd.org, svn-ports-all@freebsd.org, svn-ports-head@freebsd.org Subject: svn commit: r486909 - in head/textproc: . ucto ucto/files uctodata Message-ID: <201812080426.wB84QjZn044882@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: yuri Date: Sat Dec 8 04:26:45 2018 New Revision: 486909 URL: https://svnweb.freebsd.org/changeset/ports/486909 Log: New ports: textproc/ucto, textproc/uctodata: Advanced rule-based (regular-expression) and unicode-aware tokenizer and its data port Added: head/textproc/ucto/ head/textproc/ucto/Makefile (contents, props changed) head/textproc/ucto/distinfo (contents, props changed) head/textproc/ucto/files/ head/textproc/ucto/files/patch-config_Makefile.am (contents, props changed) head/textproc/ucto/pkg-descr (contents, props changed) head/textproc/ucto/pkg-plist (contents, props changed) head/textproc/uctodata/ head/textproc/uctodata/Makefile (contents, props changed) head/textproc/uctodata/distinfo (contents, props changed) head/textproc/uctodata/pkg-descr (contents, props changed) head/textproc/uctodata/pkg-plist (contents, props changed) Modified: head/textproc/Makefile Modified: head/textproc/Makefile ============================================================================== --- head/textproc/Makefile Sat Dec 8 00:48:00 2018 (r486908) +++ head/textproc/Makefile Sat Dec 8 04:26:45 2018 (r486909) @@ -1789,6 +1789,8 @@ SUBDIR += txt2man SUBDIR += txt2tags SUBDIR += uchardet + SUBDIR += ucto + SUBDIR += uctodata SUBDIR += uim SUBDIR += uim-el SUBDIR += uim-gtk Added: head/textproc/ucto/Makefile ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/textproc/ucto/Makefile Sat Dec 8 04:26:45 2018 (r486909) @@ -0,0 +1,33 @@ +# $FreeBSD$ + +PORTNAME= ucto +DISTVERSIONPREFIX= v +DISTVERSION= 0.14 +CATEGORIES= textproc + +MAINTAINER= yuri@FreeBSD.org +COMMENT= Advanced rule-based (regular-expression) and unicode-aware tokenizer + +LICENSE= APACHE20 +LICENSE_FILE= ${WRKSRC}/COPYING + +BUILD_DEPENDS= autoconf-archive>0:devel/autoconf-archive \ + uctodata>0:textproc/uctodata +LIB_DEPENDS= libexttextcat-2.0.so:textproc/libexttextcat \ + libfolia.so:textproc/libfolia \ + libicuio.so:devel/icu \ + libomp.so:devel/openmp \ + libticcutils.so:devel/ticcutils +RUN_DEPENDS= uctodata>0:textproc/uctodata + +USES= autoreconf gmake gnome libedit libtool pkgconfig readline +GNU_CONFIGURE= yes +CONFIGURE_ARGS= --disable-static +USE_GITHUB= yes +GH_ACCOUNT= LanguageMachines +USE_GNOME= libxml2 +USE_LDCONFIG= yes + +INSTALL_TARGET= install-strip + +.include <bsd.port.mk> Added: head/textproc/ucto/distinfo ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/textproc/ucto/distinfo Sat Dec 8 04:26:45 2018 (r486909) @@ -0,0 +1,3 @@ +TIMESTAMP = 1544204678 +SHA256 (LanguageMachines-ucto-v0.14_GH0.tar.gz) = ba40c28b0baba4eef98f88abc7c894a4b6fbaf153eaacd2ea3c9c177b0e85ea5 +SIZE (LanguageMachines-ucto-v0.14_GH0.tar.gz) = 350837 Added: head/textproc/ucto/files/patch-config_Makefile.am ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/textproc/ucto/files/patch-config_Makefile.am Sat Dec 8 04:26:45 2018 (r486909) @@ -0,0 +1,12 @@ +--- config/Makefile.am.orig 2018-12-08 03:11:07 UTC ++++ config/Makefile.am +@@ -7,7 +7,7 @@ EXTRA_DIST = $(config_DATA) + install-data-hook: + rm -f $(configdir)/textcat.cfg + if OLD_LM +- $(LN_S) $(configdir)/textcat_alt.cfg $(configdir)/textcat.cfg ++ cd $(DESTDIR)$(configdir) && $(LN_S) textcat_alt.cfg textcat.cfg + else +- $(LN_S) $(configdir)/textcat_normal.cfg $(configdir)/textcat.cfg ++ cd $(DESTDIR)$(configdir) && $(LN_S) textcat_normal.cfg textcat.cfg + endif Added: head/textproc/ucto/pkg-descr ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/textproc/ucto/pkg-descr Sat Dec 8 04:26:45 2018 (r486909) @@ -0,0 +1,10 @@ +Ucto tokenizes text files: it separates words from punctuation, and splits +sentences. It offers several other basic preprocessing steps such as changing +case that you can all use to make your text suited for further processing such +as indexing, part-of-speech tagging, or machine translation. + +Ucto comes with tokenisation rules for several languages and can be easily +extended to suit other languages. It has been incorporated for tokenizing Dutch +text in Frog, our Dutch morpho-syntactic processor. + +WWW: https://languagemachines.github.io/ucto/ Added: head/textproc/ucto/pkg-plist ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/textproc/ucto/pkg-plist Sat Dec 8 04:26:45 2018 (r486909) @@ -0,0 +1,12 @@ +bin/ucto +include/ucto/my_textcat.h +include/ucto/setting.h +include/ucto/tokenize.h +lib/libucto.so +lib/libucto.so.3 +lib/libucto.so.%%GTK3_VERSION%% +libdata/pkgconfig/ucto.pc +man/man1/ucto.1.gz +%%DATADIR%%/textcat.cfg +%%DATADIR%%/textcat_alt.cfg +%%DATADIR%%/textcat_normal.cfg Added: head/textproc/uctodata/Makefile ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/textproc/uctodata/Makefile Sat Dec 8 04:26:45 2018 (r486909) @@ -0,0 +1,23 @@ +# $FreeBSD$ + +PORTNAME= uctodata +DISTVERSIONPREFIX= v +DISTVERSION= 0.8 +CATEGORIES= textproc + +MAINTAINER= yuri@FreeBSD.org +COMMENT= Datafiles for the tokenizer 'ucto' + +LICENSE= APACHE20 +LICENSE_FILE= ${WRKSRC}/COPYING + +USES= autoreconf gmake +GNU_CONFIGURE= yes +USE_GITHUB= yes +GH_ACCOUNT= LanguageMachines + +NO_ARCH= yes + +DATADIR= ${PREFIX}/share/ucto + +.include <bsd.port.mk> Added: head/textproc/uctodata/distinfo ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/textproc/uctodata/distinfo Sat Dec 8 04:26:45 2018 (r486909) @@ -0,0 +1,3 @@ +TIMESTAMP = 1544225721 +SHA256 (LanguageMachines-uctodata-v0.8_GH0.tar.gz) = a8e5e69696facbd2c2251406560762cf7f4817620179e4a8ee8d241cf0371a5e +SIZE (LanguageMachines-uctodata-v0.8_GH0.tar.gz) = 37527 Added: head/textproc/uctodata/pkg-descr ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/textproc/uctodata/pkg-descr Sat Dec 8 04:26:45 2018 (r486909) @@ -0,0 +1,4 @@ +Datafiles for ucto, the rule-based tokenization package that is used to +parse texts in different languages. + +WWW: https://languagemachines.github.io/ucto/ Added: head/textproc/uctodata/pkg-plist ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/textproc/uctodata/pkg-plist Sat Dec 8 04:26:45 2018 (r486909) @@ -0,0 +1,33 @@ +libdata/pkgconfig/uctodata.pc +%%DATADIR%%/e-mail.rule +%%DATADIR%%/exotic-eos.eos +%%DATADIR%%/exotic-quotes.quote +%%DATADIR%%/fra.abr +%%DATADIR%%/fry.abr +%%DATADIR%%/ligatures.filter +%%DATADIR%%/nld_afk.abr +%%DATADIR%%/por.abr +%%DATADIR%%/rus.abr +%%DATADIR%%/smiley.rule +%%DATADIR%%/spa.abr +%%DATADIR%%/standard-eos.eos +%%DATADIR%%/standard-quotes.quote +%%DATADIR%%/swe.abr +%%DATADIR%%/tokconfig-deu +%%DATADIR%%/tokconfig-eng +%%DATADIR%%/tokconfig-fra +%%DATADIR%%/tokconfig-fry +%%DATADIR%%/tokconfig-generic +%%DATADIR%%/tokconfig-ita +%%DATADIR%%/tokconfig-nld +%%DATADIR%%/tokconfig-nld-historical +%%DATADIR%%/tokconfig-nld-sonarchat +%%DATADIR%%/tokconfig-nld-twitter +%%DATADIR%%/tokconfig-nld-withplaceholder +%%DATADIR%%/tokconfig-por +%%DATADIR%%/tokconfig-rus +%%DATADIR%%/tokconfig-spa +%%DATADIR%%/tokconfig-swe +%%DATADIR%%/tokconfig-tur +%%DATADIR%%/tur.abr +%%DATADIR%%/url.rule
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201812080426.wB84QjZn044882>