Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 8 Dec 2018 04:26:45 +0000 (UTC)
From:      Yuri Victorovich <yuri@FreeBSD.org>
To:        ports-committers@freebsd.org, svn-ports-all@freebsd.org, svn-ports-head@freebsd.org
Subject:   svn commit: r486909 - in head/textproc: . ucto ucto/files uctodata
Message-ID:  <201812080426.wB84QjZn044882@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: yuri
Date: Sat Dec  8 04:26:45 2018
New Revision: 486909
URL: https://svnweb.freebsd.org/changeset/ports/486909

Log:
  New ports: textproc/ucto, textproc/uctodata: Advanced rule-based (regular-expression) and unicode-aware tokenizer and its data port

Added:
  head/textproc/ucto/
  head/textproc/ucto/Makefile   (contents, props changed)
  head/textproc/ucto/distinfo   (contents, props changed)
  head/textproc/ucto/files/
  head/textproc/ucto/files/patch-config_Makefile.am   (contents, props changed)
  head/textproc/ucto/pkg-descr   (contents, props changed)
  head/textproc/ucto/pkg-plist   (contents, props changed)
  head/textproc/uctodata/
  head/textproc/uctodata/Makefile   (contents, props changed)
  head/textproc/uctodata/distinfo   (contents, props changed)
  head/textproc/uctodata/pkg-descr   (contents, props changed)
  head/textproc/uctodata/pkg-plist   (contents, props changed)
Modified:
  head/textproc/Makefile

Modified: head/textproc/Makefile
==============================================================================
--- head/textproc/Makefile	Sat Dec  8 00:48:00 2018	(r486908)
+++ head/textproc/Makefile	Sat Dec  8 04:26:45 2018	(r486909)
@@ -1789,6 +1789,8 @@
     SUBDIR += txt2man
     SUBDIR += txt2tags
     SUBDIR += uchardet
+    SUBDIR += ucto
+    SUBDIR += uctodata
     SUBDIR += uim
     SUBDIR += uim-el
     SUBDIR += uim-gtk

Added: head/textproc/ucto/Makefile
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/textproc/ucto/Makefile	Sat Dec  8 04:26:45 2018	(r486909)
@@ -0,0 +1,33 @@
+# $FreeBSD$
+
+PORTNAME=	ucto
+DISTVERSIONPREFIX=	v
+DISTVERSION=	0.14
+CATEGORIES=	textproc
+
+MAINTAINER=	yuri@FreeBSD.org
+COMMENT=	Advanced rule-based (regular-expression) and unicode-aware tokenizer
+
+LICENSE=	APACHE20
+LICENSE_FILE=	${WRKSRC}/COPYING
+
+BUILD_DEPENDS=	autoconf-archive>0:devel/autoconf-archive \
+		uctodata>0:textproc/uctodata
+LIB_DEPENDS=	libexttextcat-2.0.so:textproc/libexttextcat \
+		libfolia.so:textproc/libfolia \
+		libicuio.so:devel/icu \
+		libomp.so:devel/openmp \
+		libticcutils.so:devel/ticcutils
+RUN_DEPENDS=	uctodata>0:textproc/uctodata
+
+USES=		autoreconf gmake gnome libedit libtool pkgconfig readline
+GNU_CONFIGURE=	yes
+CONFIGURE_ARGS=	--disable-static
+USE_GITHUB=	yes
+GH_ACCOUNT=	LanguageMachines
+USE_GNOME=	libxml2
+USE_LDCONFIG=	yes
+
+INSTALL_TARGET=	install-strip
+
+.include <bsd.port.mk>

Added: head/textproc/ucto/distinfo
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/textproc/ucto/distinfo	Sat Dec  8 04:26:45 2018	(r486909)
@@ -0,0 +1,3 @@
+TIMESTAMP = 1544204678
+SHA256 (LanguageMachines-ucto-v0.14_GH0.tar.gz) = ba40c28b0baba4eef98f88abc7c894a4b6fbaf153eaacd2ea3c9c177b0e85ea5
+SIZE (LanguageMachines-ucto-v0.14_GH0.tar.gz) = 350837

Added: head/textproc/ucto/files/patch-config_Makefile.am
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/textproc/ucto/files/patch-config_Makefile.am	Sat Dec  8 04:26:45 2018	(r486909)
@@ -0,0 +1,12 @@
+--- config/Makefile.am.orig	2018-12-08 03:11:07 UTC
++++ config/Makefile.am
+@@ -7,7 +7,7 @@ EXTRA_DIST = $(config_DATA)
+ install-data-hook:
+ 	rm -f $(configdir)/textcat.cfg
+ if OLD_LM
+-	$(LN_S) $(configdir)/textcat_alt.cfg $(configdir)/textcat.cfg
++	cd $(DESTDIR)$(configdir) && $(LN_S) textcat_alt.cfg textcat.cfg
+ else
+-	$(LN_S) $(configdir)/textcat_normal.cfg $(configdir)/textcat.cfg
++	cd $(DESTDIR)$(configdir) && $(LN_S) textcat_normal.cfg textcat.cfg
+ endif

Added: head/textproc/ucto/pkg-descr
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/textproc/ucto/pkg-descr	Sat Dec  8 04:26:45 2018	(r486909)
@@ -0,0 +1,10 @@
+Ucto tokenizes text files: it separates words from punctuation, and splits
+sentences. It offers several other basic preprocessing steps such as changing
+case that you can all use to make your text suited for further processing such
+as indexing, part-of-speech tagging, or machine translation.
+
+Ucto comes with tokenisation rules for several languages and can be easily
+extended to suit other languages. It has been incorporated for tokenizing Dutch
+text in Frog, our Dutch morpho-syntactic processor.
+
+WWW: https://languagemachines.github.io/ucto/

Added: head/textproc/ucto/pkg-plist
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/textproc/ucto/pkg-plist	Sat Dec  8 04:26:45 2018	(r486909)
@@ -0,0 +1,12 @@
+bin/ucto
+include/ucto/my_textcat.h
+include/ucto/setting.h
+include/ucto/tokenize.h
+lib/libucto.so
+lib/libucto.so.3
+lib/libucto.so.%%GTK3_VERSION%%
+libdata/pkgconfig/ucto.pc
+man/man1/ucto.1.gz
+%%DATADIR%%/textcat.cfg
+%%DATADIR%%/textcat_alt.cfg
+%%DATADIR%%/textcat_normal.cfg

Added: head/textproc/uctodata/Makefile
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/textproc/uctodata/Makefile	Sat Dec  8 04:26:45 2018	(r486909)
@@ -0,0 +1,23 @@
+# $FreeBSD$
+
+PORTNAME=	uctodata
+DISTVERSIONPREFIX=	v
+DISTVERSION=	0.8
+CATEGORIES=	textproc
+
+MAINTAINER=	yuri@FreeBSD.org
+COMMENT=	Datafiles for the tokenizer 'ucto'
+
+LICENSE=	APACHE20
+LICENSE_FILE=	${WRKSRC}/COPYING
+
+USES=		autoreconf gmake
+GNU_CONFIGURE=	yes
+USE_GITHUB=	yes
+GH_ACCOUNT=	LanguageMachines
+
+NO_ARCH=	yes
+
+DATADIR=	${PREFIX}/share/ucto
+
+.include <bsd.port.mk>

Added: head/textproc/uctodata/distinfo
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/textproc/uctodata/distinfo	Sat Dec  8 04:26:45 2018	(r486909)
@@ -0,0 +1,3 @@
+TIMESTAMP = 1544225721
+SHA256 (LanguageMachines-uctodata-v0.8_GH0.tar.gz) = a8e5e69696facbd2c2251406560762cf7f4817620179e4a8ee8d241cf0371a5e
+SIZE (LanguageMachines-uctodata-v0.8_GH0.tar.gz) = 37527

Added: head/textproc/uctodata/pkg-descr
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/textproc/uctodata/pkg-descr	Sat Dec  8 04:26:45 2018	(r486909)
@@ -0,0 +1,4 @@
+Datafiles for ucto, the rule-based tokenization package that is used to
+parse texts in different languages.
+
+WWW: https://languagemachines.github.io/ucto/

Added: head/textproc/uctodata/pkg-plist
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/textproc/uctodata/pkg-plist	Sat Dec  8 04:26:45 2018	(r486909)
@@ -0,0 +1,33 @@
+libdata/pkgconfig/uctodata.pc
+%%DATADIR%%/e-mail.rule
+%%DATADIR%%/exotic-eos.eos
+%%DATADIR%%/exotic-quotes.quote
+%%DATADIR%%/fra.abr
+%%DATADIR%%/fry.abr
+%%DATADIR%%/ligatures.filter
+%%DATADIR%%/nld_afk.abr
+%%DATADIR%%/por.abr
+%%DATADIR%%/rus.abr
+%%DATADIR%%/smiley.rule
+%%DATADIR%%/spa.abr
+%%DATADIR%%/standard-eos.eos
+%%DATADIR%%/standard-quotes.quote
+%%DATADIR%%/swe.abr
+%%DATADIR%%/tokconfig-deu
+%%DATADIR%%/tokconfig-eng
+%%DATADIR%%/tokconfig-fra
+%%DATADIR%%/tokconfig-fry
+%%DATADIR%%/tokconfig-generic
+%%DATADIR%%/tokconfig-ita
+%%DATADIR%%/tokconfig-nld
+%%DATADIR%%/tokconfig-nld-historical
+%%DATADIR%%/tokconfig-nld-sonarchat
+%%DATADIR%%/tokconfig-nld-twitter
+%%DATADIR%%/tokconfig-nld-withplaceholder
+%%DATADIR%%/tokconfig-por
+%%DATADIR%%/tokconfig-rus
+%%DATADIR%%/tokconfig-spa
+%%DATADIR%%/tokconfig-swe
+%%DATADIR%%/tokconfig-tur
+%%DATADIR%%/tur.abr
+%%DATADIR%%/url.rule



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201812080426.wB84QjZn044882>