Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 22 Jun 2019 04:33:53 +0000 (UTC)
From:      Yuri Victorovich <yuri@FreeBSD.org>
To:        ports-committers@freebsd.org, svn-ports-all@freebsd.org, svn-ports-head@freebsd.org
Subject:   svn commit: r504845 - in head/www: . colly
Message-ID:  <201906220433.x5M4XrCK037988@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: yuri
Date: Sat Jun 22 04:33:53 2019
New Revision: 504845
URL: https://svnweb.freebsd.org/changeset/ports/504845

Log:
  New port: www/colly: Elegant scraper and crawler framework for Golang

Added:
  head/www/colly/
  head/www/colly/Makefile   (contents, props changed)
  head/www/colly/distinfo   (contents, props changed)
  head/www/colly/pkg-descr   (contents, props changed)
Modified:
  head/www/Makefile

Modified: head/www/Makefile
==============================================================================
--- head/www/Makefile	Sat Jun 22 03:36:50 2019	(r504844)
+++ head/www/Makefile	Sat Jun 22 04:33:53 2019	(r504845)
@@ -90,6 +90,7 @@
     SUBDIR += cntlm
     SUBDIR += codeigniter
     SUBDIR += coppermine
+    SUBDIR += colly
     SUBDIR += cplanet
     SUBDIR += cppcms
     SUBDIR += cpr

Added: head/www/colly/Makefile
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/www/colly/Makefile	Sat Jun 22 04:33:53 2019	(r504845)
@@ -0,0 +1,50 @@
+# $FreeBSD$
+
+PORTNAME=	colly
+DISTVERSIONPREFIX=	v
+DISTVERSION=	1.2.0-32
+DISTVERSIONSUFFIX=	-gd360e4b
+CATEGORIES=	www
+
+MAINTAINER=	yuri@FreeBSD.org
+COMMENT=	Elegant scraper and crawler framework for Golang
+
+LICENSE=	APACHE20
+LICENSE_FILE=	${WRKSRC}/LICENSE.txt
+
+USES=		go
+USE_GITHUB=	yes
+GH_ACCOUNT=	gocolly
+GH_TUPLE=	\
+		andybalholm:cascadia:680b6a5:cascadia/vendor/github.com/andybalholm/cascadia \
+		antchfx:htmlquery:v1.0.0:htmlquery/vendor/github.com/antchfx/htmlquery \
+		antchfx:xmlquery:v1.0.0:xmlquery/vendor/github.com/antchfx/xmlquery \
+		antchfx:xpath:v1.0.0:xpath/vendor/github.com/antchfx/xpath \
+		gobwas:glob:v0.2.3:glob/vendor/github.com/gobwas/glob \
+		golang:appengine:v1.6.1:appengine/vendor/google.golang.org/appengine \
+		golang:net:0ed95ab:net/vendor/golang.org/x/net \
+		golang:protobuf:v1.3.1:protobuf/vendor/github.com/golang/protobuf \
+		golang:text:v0.3.2:golang_text/vendor/golang.org/x/text \
+		jawher:mow.cli:v1.1.0:mow_cli/vendor/github.com/jawher/mow.cli \
+		kennygrant:sanitize:v1.2.4:sanitize/vendor/github.com/kennygrant/sanitize \
+		PuerkitoBio:goquery:v1.4.1:goquery/vendor/github.com/PuerkitoBio/goquery \
+		saintfish:chardet:3af4cd4:chardet/vendor/github.com/saintfish/chardet \
+		temoto:robotstxt:97ee4a9:robotstxt/vendor/github.com/temoto/robotstxt
+
+GO_PKGNAME=	github.com/${GH_ACCOUNT}/${PORTNAME}
+GO_TARGET=	${GO_PKGNAME}/cmd/${PORTNAME} \
+		${EXAMPLES_INSTALLED:S/^/${GO_PKGNAME}\/_examples\//}
+
+PLIST_FILES=	bin/${PORTNAME} \
+		${EXAMPLES_INSTALLED:S/^/bin\/${PORTNAME}-/}
+
+# many examples of this sraping framework are interesting in themselves, and therefore are installed as executables
+EXAMPLES_INSTALLED=	basic coursera_courses cryptocoinmarketcap factba.se hackernews_comments instagram openedx_courses reddit shopify_sitemap google_groups
+
+post-install:
+.for e in ${EXAMPLES_INSTALLED}
+	@cd ${STAGEDIR}${PREFIX}/bin && \
+		${MV} ${e} ${PORTNAME}-${e}
+.endfor
+
+.include <bsd.port.mk>

Added: head/www/colly/distinfo
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/www/colly/distinfo	Sat Jun 22 04:33:53 2019	(r504845)
@@ -0,0 +1,31 @@
+TIMESTAMP = 1561176363
+SHA256 (gocolly-colly-v1.2.0-32-gd360e4b_GH0.tar.gz) = abfa88e83ec4a222b5aad177617cef828c0673f40f82a62069e3959a80cd8c13
+SIZE (gocolly-colly-v1.2.0-32-gd360e4b_GH0.tar.gz) = 4119505
+SHA256 (antchfx-htmlquery-v1.0.0_GH0.tar.gz) = 99349026726dac354fba22559cb9fe3838177d0c4748c577734b1c120a31c3d1
+SIZE (antchfx-htmlquery-v1.0.0_GH0.tar.gz) = 5108
+SHA256 (antchfx-xmlquery-v1.0.0_GH0.tar.gz) = 389b92bd0b92b0f2a1c33e827847d0e7aa7de38497b52144984513431d0b3956
+SIZE (antchfx-xmlquery-v1.0.0_GH0.tar.gz) = 10379
+SHA256 (antchfx-xpath-v1.0.0_GH0.tar.gz) = 5772fc35984757fcd455aa024bcdb7402f76ca231234547896c5b8af9f9c929a
+SIZE (antchfx-xpath-v1.0.0_GH0.tar.gz) = 23845
+SHA256 (andybalholm-cascadia-680b6a5_GH0.tar.gz) = a5f9cc54b003ab93a012ff9e91c3f324e19997891096d1b426939729ea9bdf53
+SIZE (andybalholm-cascadia-680b6a5_GH0.tar.gz) = 13257
+SHA256 (gobwas-glob-v0.2.3_GH0.tar.gz) = 325026fc78bcebcf31151b6e060f4e1c3321b04ded3dab63b63610b323c10850
+SIZE (gobwas-glob-v0.2.3_GH0.tar.gz) = 25962
+SHA256 (golang-appengine-v1.6.1_GH0.tar.gz) = 1755aaf4c6246579337bf1bc4f834ef00c56216da87b20456863ca91985b5afa
+SIZE (golang-appengine-v1.6.1_GH0.tar.gz) = 333353
+SHA256 (golang-net-0ed95ab_GH0.tar.gz) = c47997a853b9e9accfb3192498e1b3333f6d000b6674fdfc9e22bd3675c4ff0f
+SIZE (golang-net-0ed95ab_GH0.tar.gz) = 929664
+SHA256 (golang-protobuf-v1.3.1_GH0.tar.gz) = 3f3a6123054a9847093c119895f1660612f301fe95358f3a6a1a33fd0933e6cf
+SIZE (golang-protobuf-v1.3.1_GH0.tar.gz) = 310884
+SHA256 (golang-text-v0.3.2_GH0.tar.gz) = 0b9309698f5708531c5377ab1e29b423a6d9e20c55a8d386c3b8283428212f22
+SIZE (golang-text-v0.3.2_GH0.tar.gz) = 7168069
+SHA256 (jawher-mow.cli-v1.1.0_GH0.tar.gz) = af18e157df5c23a1a4f05cb45fadef0a0a6f18717582656d42f6287d2bdc398a
+SIZE (jawher-mow.cli-v1.1.0_GH0.tar.gz) = 65838
+SHA256 (kennygrant-sanitize-v1.2.4_GH0.tar.gz) = cba9e054e07ba95b23d16b409eb7ac157e6be68c024e3fed0bbfef78524d4802
+SIZE (kennygrant-sanitize-v1.2.4_GH0.tar.gz) = 8922
+SHA256 (PuerkitoBio-goquery-v1.4.1_GH0.tar.gz) = 49962be8afaef664079868699c98b770728005df70c6751b91c55c86a1b3a86c
+SIZE (PuerkitoBio-goquery-v1.4.1_GH0.tar.gz) = 100484
+SHA256 (saintfish-chardet-3af4cd4_GH0.tar.gz) = 5701f2b44a796780d91243bcaa2b50b2e53a9a0b04940a049bff058cc54a6e06
+SIZE (saintfish-chardet-3af4cd4_GH0.tar.gz) = 267025
+SHA256 (temoto-robotstxt-97ee4a9_GH0.tar.gz) = 7b095477587cf720aa02cdba532a4a7838b656b8f4ed63dbfd97c07880565b7b
+SIZE (temoto-robotstxt-97ee4a9_GH0.tar.gz) = 14070

Added: head/www/colly/pkg-descr
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/www/colly/pkg-descr	Sat Jun 22 04:33:53 2019	(r504845)
@@ -0,0 +1,17 @@
+With Colly you can easily extract structured data from websites, which can be
+used for a wide range of applications, like data mining, data processing or
+archiving.
+
+Features:
+* Clean API
+* Fast (>1k request/sec on a single core)
+* Manages request delays and maximum concurrency per domain
+* Automatic cookie and session handling
+* Sync/async/parallel scraping
+* Distributed scraping
+* Caching
+* Automatic encoding of non-unicode responses
+* Robots.txt support
+* Google App Engine support
+
+WWW: http://go-colly.org/



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201906220433.x5M4XrCK037988>