Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 08 Mar 2004 17:51:38 +0100
From:      Oliver Eikemeier <eikemeier@fillmore-labs.com>
To:        FreeBSD-gnats-submit@FreeBSD.org
Subject:   ports/63933: [PATCH] bsd.port.mk: support for sorting MASTER_SITES by table lookup
Message-ID:  <404CA49A.5080208@fillmore-labs.com>
Resent-Message-ID: <200403081700.i28H0WD7058458@freefall.freebsd.org>

next in thread | raw e-mail | index | archive | help

>Number:         63933
>Category:       ports
>Synopsis:       [PATCH] bsd.port.mk: support for sorting MASTER_SITES by table lookup
>Confidential:   no
>Severity:       non-critical
>Priority:       low
>Responsible:    freebsd-ports-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          change-request
>Submitter-Id:   current-users
>Arrival-Date:   Mon Mar 08 09:00:32 PST 2004
>Closed-Date:
>Last-Modified:
>Originator:     Oliver Eikemeier
>Release:        FreeBSD 4.9-STABLE i386
>Organization:
Fillmore Labs - http://www.fillmore-labs.com
>Environment:
System: FreeBSD nuuk.fillmore-labs.com 4.9-STABLE

>Description:

Currently there are four ways to influence the order of the sites a port tries
to fetch its distfiles from:

- reorder MASTER_SITES in the ports Makefile
- reorder MASTER_SITE_* in the bsd.sites.mk
- add a specially formated MASTER_SORT_REGEX in /etc/make.conf
- define RANDOMIZE_MASTER_SITES (which shuffles MASTER_SITE_OVERRIDE and
  MASTER_SITE_BACKUP too)

This patch adds support for a custom table, /var/db/distrank/ranks, consisting of
site (a host) - rank (a number) pairs. A ports MASTER_SITES are looked up in this
table and are sorted by the respective ranking. If a site isn't found in the table,
is it put at the end of the list. A small 'fuzz' factor is added to every value,
so that sites with numerically nearby ranking have a chance to be used alternating.
Currently, the results are sorted again by MASTER_SORT_REGEX.

The ranking has to be site specific, and could be generated by downloading ISP specific
tables or calculated using GeoIP. A skeleton for a port that measures latency is attached
to this PR to demonstrate the generation of a table, but is not an integral part of this
patch. I might make this into a port if the patch is committed.

>How-To-Repeat:
>Fix:

Index: bsd.port.mk
===================================================================
RCS file: /home/ncvs/ports/Mk/bsd.port.mk,v
retrieving revision 1.484
diff -u -r1.484 bsd.port.mk
--- bsd.port.mk	4 Feb 2004 04:27:04 -0000	1.484
+++ bsd.port.mk	8 Mar 2004 11:56:25 -0000
@@ -1938,6 +1938,25 @@
 FETCH_REGET?=	0
 .endif
 
+RANKFILE?=		/var/db/distrank/ranks
+
+.if exists(${RANKFILE})
+RANK_FUZZ?=		5
+RANK_HIGH?=		9999
+_RANK_SITES?=		| ${TR} -s ' \t' '\n' \
+			| ${AWK} 'BEGIN { \
+				IGNORECASE = 1; \
+				while(getline < "${RANKFILE}" > 0) { rank[tolower($$1)]=$$2 } \
+			} \
+			{ \
+				pos=rank[tolower(gensub(/^(f|ht)tp:\/\/([^ \/]+)\/.*/, "\\\2", 1))]; \
+				print $$0 "\t" url "\t" (pos ? pos + rand() * ${RANK_FUZZ} : ${RANK_HIGH}) \
+			}' \
+			| ${SORT} -n -k 2 | ${CUT} -f 1 | ${TR} '\n' ' '
+.else
+_RANK_SITES?=		''
+.endif
+
 .if defined(RANDOMIZE_MASTER_SITES)
 .if exists(/usr/games/random)
 RANDOM_CMD?=	/usr/games/random
@@ -2453,7 +2472,7 @@
 .			if !target(master-sites-${_group})
 SORTED_MASTER_SITES_${_group}_CMD=	cd ${.CURDIR} && ${MAKE} ${__softMAKEFLAGS} master-sites-${_group}
 master-sites-${_group}:
-	@${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_MASTER_SITES_${_group}}' | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}
+	@${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_MASTER_SITES_${_group}}' ${_RANK_SITES} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}
 .			endif
 .		endfor
 .	endif
@@ -2465,7 +2484,7 @@
 .			if !target(patch-sites-${_group})
 SORTED_PATCH_SITES_${_group}_CMD=	cd ${.CURDIR} && ${MAKE} ${__softMAKEFLAGS} patch-sites-${_group}
 patch-sites-${_group}:
-	@${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_PATCH_SITES_${_group}}' | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}
+	@${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_PATCH_SITES_${_group}}' ${_RANK_SITES} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}
 .			endif
 .		endfor
 .	endif
@@ -2498,14 +2517,14 @@
 .endfor
 
 master-sites-ALL:
-	@${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_MASTER_SITES_ALL}' | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}
+	@${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_MASTER_SITES_ALL}' ${_RANK_SITES} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}
 patch-sites-ALL:
-	@${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_PATCH_SITES_ALL}' | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}
+	@${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_PATCH_SITES_ALL}' ${_RANK_SITES} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}
 # has similar effect to old targets, i.e., access only {MASTER,PATCH}_SITES, not working with the new _n variables
 master-sites-DEFAULT:
-	@${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_MASTER_SITES_DEFAULT}' | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}
+	@${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_MASTER_SITES_DEFAULT}' ${_RANK_SITES} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}
 patch-sites-DEFAULT:
-	@${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_PATCH_SITES_DEFAULT}' | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}
+	@${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_PATCH_SITES_DEFAULT}' ${_RANK_SITES} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}
 
 # synonyms, mnemonics
 master-sites-all: master-sites-ALL
@@ -3140,7 +3159,7 @@
 					fi \
 				done; \
 				___MASTER_SITES_TMP= ; \
-				SORTED_MASTER_SITES_CMD_TMP="${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} $${__MASTER_SITES_TMP} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}" ; \
+				SORTED_MASTER_SITES_CMD_TMP="${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} $${__MASTER_SITES_TMP} ${_RANK_SITES} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}" ; \
 			else \
 				SORTED_MASTER_SITES_CMD_TMP="${SORTED_MASTER_SITES_DEFAULT_CMD}" ; \
 			fi ; \
@@ -3193,7 +3212,7 @@
 					fi \
 				done; \
 				___PATCH_SITES_TMP= ; \
-				SORTED_PATCH_SITES_CMD_TMP="${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} $${__PATCH_SITES_TMP} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}" ; \
+				SORTED_PATCH_SITES_CMD_TMP="${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} $${__PATCH_SITES_TMP} ${_RANK_SITES} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}" ; \
 			else \
 				SORTED_PATCH_SITES_CMD_TMP="${SORTED_PATCH_SITES_DEFAULT_CMD}" ; \
 			fi ; \
@@ -4133,7 +4152,7 @@
 					fi \
 				done; \
 				___MASTER_SITES_TMP= ; \
-				SORTED_MASTER_SITES_CMD_TMP="${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} $${__MASTER_SITES_TMP} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}" ; \
+				SORTED_MASTER_SITES_CMD_TMP="${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} $${__MASTER_SITES_TMP} ${_RANK_SITES} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}" ; \
 			else \
 				SORTED_MASTER_SITES_CMD_TMP="${SORTED_MASTER_SITES_DEFAULT_CMD}" ; \
 			fi ; \
@@ -4165,7 +4184,7 @@
 					fi \
 				done; \
 				___PATCH_SITES_TMP= ; \
-				SORTED_PATCH_SITES_CMD_TMP="${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} $${__PATCH_SITES_TMP} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}" ; \
+				SORTED_PATCH_SITES_CMD_TMP="${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} $${__PATCH_SITES_TMP} ${_RANK_SITES} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}" ; \
 			else \
 				SORTED_PATCH_SITES_CMD_TMP="${SORTED_PATCH_SITES_DEFAULT_CMD}" ; \
 			fi ; \

I guess before/after committing this port, the sorting code should be refactored.
I volunteer if nobody else steps up.


Sample shell script to generate /var/db/distrank/ranks (requires port net/fping).

I am aware of the following issues:

- the script has a running time of over two hours, it could be faster if parallelized
- latency is a bad measure of download speed, bandwith might be better
- some hosts do block ICMP echo packets
- network bandwith changes over time

#!/bin/sh -e
#
# Copyright (c) 2004 Oliver Eikemeier. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
#  1. Redistributions of source code must retain the above copyright notice
#     this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the author nor the names of its contributors may be
#    used to endorse or promote products derived from this software without
#    specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
# AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# $FreeBSD$
#

if [ ! -d  "${DBDIR:=/var/db/distrank}" ]; then
    mkdir -p "${DBDIR}"
fi

echo "ranking ports MASTER_SITES"
echo "please be patient, this may take more than two hours"

cd "${PORTSDIR:=/usr/ports}"
CATEGORIES=`make -VSUBDIR`

for category in ${CATEGORIES}; do
    if [ ! -d "${PORTSDIR}/${category}" ]; then continue; fi
    cd "${PORTSDIR}/${category}"
    PORTS=`make -VSUBDIR`

    for port in ${PORTS}; do
        if [ ! -d "${PORTSDIR}/${category}/${port}" ]; then continue; fi
        cd "${PORTSDIR}/${category}/${port}"
        make -DFETCH_ALL -VMASTER_SITES -VPATCH_SITES makesum 2>/dev/null || true
    done
done \
    | tr -s ' \t' '\n' \
    | tr '[:upper:]' '[:lower:]' \
    | sed -nE 's;^(f|ht)tp://([^/]+)/.*$;\2;p' \
    | sort -u \
    | fping -q -c 3 2>&1 \
    | sed -nE 's;^([^ :/]+) *:.*min/avg/max += +[0-9.]+/([0-9.]+)/[0-9.]+.*$;\1	\2;p' \
    | sort -n -k 2 \
    > "${DBDIR}/ranks"

chmod a+r "${DBDIR}/ranks"

>Release-Note:
>Audit-Trail:
>Unformatted:



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?404CA49A.5080208>