Date: Mon, 08 Mar 2004 17:51:38 +0100 From: Oliver Eikemeier <eikemeier@fillmore-labs.com> To: FreeBSD-gnats-submit@FreeBSD.org Subject: ports/63933: [PATCH] bsd.port.mk: support for sorting MASTER_SITES by table lookup Message-ID: <404CA49A.5080208@fillmore-labs.com> Resent-Message-ID: <200403081700.i28H0WD7058458@freefall.freebsd.org>
next in thread | raw e-mail | index | archive | help
>Number: 63933 >Category: ports >Synopsis: [PATCH] bsd.port.mk: support for sorting MASTER_SITES by table lookup >Confidential: no >Severity: non-critical >Priority: low >Responsible: freebsd-ports-bugs >State: open >Quarter: >Keywords: >Date-Required: >Class: change-request >Submitter-Id: current-users >Arrival-Date: Mon Mar 08 09:00:32 PST 2004 >Closed-Date: >Last-Modified: >Originator: Oliver Eikemeier >Release: FreeBSD 4.9-STABLE i386 >Organization: Fillmore Labs - http://www.fillmore-labs.com >Environment: System: FreeBSD nuuk.fillmore-labs.com 4.9-STABLE >Description: Currently there are four ways to influence the order of the sites a port tries to fetch its distfiles from: - reorder MASTER_SITES in the ports Makefile - reorder MASTER_SITE_* in the bsd.sites.mk - add a specially formated MASTER_SORT_REGEX in /etc/make.conf - define RANDOMIZE_MASTER_SITES (which shuffles MASTER_SITE_OVERRIDE and MASTER_SITE_BACKUP too) This patch adds support for a custom table, /var/db/distrank/ranks, consisting of site (a host) - rank (a number) pairs. A ports MASTER_SITES are looked up in this table and are sorted by the respective ranking. If a site isn't found in the table, is it put at the end of the list. A small 'fuzz' factor is added to every value, so that sites with numerically nearby ranking have a chance to be used alternating. Currently, the results are sorted again by MASTER_SORT_REGEX. The ranking has to be site specific, and could be generated by downloading ISP specific tables or calculated using GeoIP. A skeleton for a port that measures latency is attached to this PR to demonstrate the generation of a table, but is not an integral part of this patch. I might make this into a port if the patch is committed. >How-To-Repeat: >Fix: Index: bsd.port.mk =================================================================== RCS file: /home/ncvs/ports/Mk/bsd.port.mk,v retrieving revision 1.484 diff -u -r1.484 bsd.port.mk --- bsd.port.mk 4 Feb 2004 04:27:04 -0000 1.484 +++ bsd.port.mk 8 Mar 2004 11:56:25 -0000 @@ -1938,6 +1938,25 @@ FETCH_REGET?= 0 .endif +RANKFILE?= /var/db/distrank/ranks + +.if exists(${RANKFILE}) +RANK_FUZZ?= 5 +RANK_HIGH?= 9999 +_RANK_SITES?= | ${TR} -s ' \t' '\n' \ + | ${AWK} 'BEGIN { \ + IGNORECASE = 1; \ + while(getline < "${RANKFILE}" > 0) { rank[tolower($$1)]=$$2 } \ + } \ + { \ + pos=rank[tolower(gensub(/^(f|ht)tp:\/\/([^ \/]+)\/.*/, "\\\2", 1))]; \ + print $$0 "\t" url "\t" (pos ? pos + rand() * ${RANK_FUZZ} : ${RANK_HIGH}) \ + }' \ + | ${SORT} -n -k 2 | ${CUT} -f 1 | ${TR} '\n' ' ' +.else +_RANK_SITES?= '' +.endif + .if defined(RANDOMIZE_MASTER_SITES) .if exists(/usr/games/random) RANDOM_CMD?= /usr/games/random @@ -2453,7 +2472,7 @@ . if !target(master-sites-${_group}) SORTED_MASTER_SITES_${_group}_CMD= cd ${.CURDIR} && ${MAKE} ${__softMAKEFLAGS} master-sites-${_group} master-sites-${_group}: - @${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_MASTER_SITES_${_group}}' | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP} + @${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_MASTER_SITES_${_group}}' ${_RANK_SITES} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP} . endif . endfor . endif @@ -2465,7 +2484,7 @@ . if !target(patch-sites-${_group}) SORTED_PATCH_SITES_${_group}_CMD= cd ${.CURDIR} && ${MAKE} ${__softMAKEFLAGS} patch-sites-${_group} patch-sites-${_group}: - @${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_PATCH_SITES_${_group}}' | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP} + @${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_PATCH_SITES_${_group}}' ${_RANK_SITES} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP} . endif . endfor . endif @@ -2498,14 +2517,14 @@ .endfor master-sites-ALL: - @${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_MASTER_SITES_ALL}' | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP} + @${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_MASTER_SITES_ALL}' ${_RANK_SITES} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP} patch-sites-ALL: - @${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_PATCH_SITES_ALL}' | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP} + @${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_PATCH_SITES_ALL}' ${_RANK_SITES} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP} # has similar effect to old targets, i.e., access only {MASTER,PATCH}_SITES, not working with the new _n variables master-sites-DEFAULT: - @${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_MASTER_SITES_DEFAULT}' | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP} + @${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_MASTER_SITES_DEFAULT}' ${_RANK_SITES} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP} patch-sites-DEFAULT: - @${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_PATCH_SITES_DEFAULT}' | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP} + @${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_PATCH_SITES_DEFAULT}' ${_RANK_SITES} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP} # synonyms, mnemonics master-sites-all: master-sites-ALL @@ -3140,7 +3159,7 @@ fi \ done; \ ___MASTER_SITES_TMP= ; \ - SORTED_MASTER_SITES_CMD_TMP="${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} $${__MASTER_SITES_TMP} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}" ; \ + SORTED_MASTER_SITES_CMD_TMP="${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} $${__MASTER_SITES_TMP} ${_RANK_SITES} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}" ; \ else \ SORTED_MASTER_SITES_CMD_TMP="${SORTED_MASTER_SITES_DEFAULT_CMD}" ; \ fi ; \ @@ -3193,7 +3212,7 @@ fi \ done; \ ___PATCH_SITES_TMP= ; \ - SORTED_PATCH_SITES_CMD_TMP="${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} $${__PATCH_SITES_TMP} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}" ; \ + SORTED_PATCH_SITES_CMD_TMP="${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} $${__PATCH_SITES_TMP} ${_RANK_SITES} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}" ; \ else \ SORTED_PATCH_SITES_CMD_TMP="${SORTED_PATCH_SITES_DEFAULT_CMD}" ; \ fi ; \ @@ -4133,7 +4152,7 @@ fi \ done; \ ___MASTER_SITES_TMP= ; \ - SORTED_MASTER_SITES_CMD_TMP="${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} $${__MASTER_SITES_TMP} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}" ; \ + SORTED_MASTER_SITES_CMD_TMP="${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} $${__MASTER_SITES_TMP} ${_RANK_SITES} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}" ; \ else \ SORTED_MASTER_SITES_CMD_TMP="${SORTED_MASTER_SITES_DEFAULT_CMD}" ; \ fi ; \ @@ -4165,7 +4184,7 @@ fi \ done; \ ___PATCH_SITES_TMP= ; \ - SORTED_PATCH_SITES_CMD_TMP="${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} $${__PATCH_SITES_TMP} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}" ; \ + SORTED_PATCH_SITES_CMD_TMP="${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} $${__PATCH_SITES_TMP} ${_RANK_SITES} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}" ; \ else \ SORTED_PATCH_SITES_CMD_TMP="${SORTED_PATCH_SITES_DEFAULT_CMD}" ; \ fi ; \ I guess before/after committing this port, the sorting code should be refactored. I volunteer if nobody else steps up. Sample shell script to generate /var/db/distrank/ranks (requires port net/fping). I am aware of the following issues: - the script has a running time of over two hours, it could be faster if parallelized - latency is a bad measure of download speed, bandwith might be better - some hosts do block ICMP echo packets - network bandwith changes over time #!/bin/sh -e # # Copyright (c) 2004 Oliver Eikemeier. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # 1. Redistributions of source code must retain the above copyright notice # this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. Neither the name of the author nor the names of its contributors may be # used to endorse or promote products derived from this software without # specific prior written permission. # # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY # AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE # COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # $FreeBSD$ # if [ ! -d "${DBDIR:=/var/db/distrank}" ]; then mkdir -p "${DBDIR}" fi echo "ranking ports MASTER_SITES" echo "please be patient, this may take more than two hours" cd "${PORTSDIR:=/usr/ports}" CATEGORIES=`make -VSUBDIR` for category in ${CATEGORIES}; do if [ ! -d "${PORTSDIR}/${category}" ]; then continue; fi cd "${PORTSDIR}/${category}" PORTS=`make -VSUBDIR` for port in ${PORTS}; do if [ ! -d "${PORTSDIR}/${category}/${port}" ]; then continue; fi cd "${PORTSDIR}/${category}/${port}" make -DFETCH_ALL -VMASTER_SITES -VPATCH_SITES makesum 2>/dev/null || true done done \ | tr -s ' \t' '\n' \ | tr '[:upper:]' '[:lower:]' \ | sed -nE 's;^(f|ht)tp://([^/]+)/.*$;\2;p' \ | sort -u \ | fping -q -c 3 2>&1 \ | sed -nE 's;^([^ :/]+) *:.*min/avg/max += +[0-9.]+/([0-9.]+)/[0-9.]+.*$;\1 \2;p' \ | sort -n -k 2 \ > "${DBDIR}/ranks" chmod a+r "${DBDIR}/ranks" >Release-Note: >Audit-Trail: >Unformatted:
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?404CA49A.5080208>