Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 23 Dec 1996 15:19:42 +1100 (EST)
From:      Julian Assange <proff@iq.org>
To:        danny@panda.hilink.com.au (Daniel O'Callaghan)
Cc:        hackers@freebsd.org
Subject:   Re: ipretard.c selective tcp/ip queues and throughput limiters
Message-ID:  <199612230419.PAA03748@profane.iq.org>
In-Reply-To: <Pine.BSF.3.91.961223091538.229V-100000@panda.hilink.com.au> from Daniel O'Callaghan at "Dec 23, 96 09:16:35 am"

next in thread | previous in thread | raw e-mail | index | archive | help

Written this morning. If people find it useful, I'll polish it.

example:

	# ipfw add divert 92 tcp from any to any 80 out via ed0
	# ./ipretard -v -t 1000/300 -w 2208

/* ipretard (c) 1996 Julian Assange (proff@suburbia.net) All Rights Reserved */

#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
#include <signal.h>
#include <errno.h>

#include <sys/socket.h>
#include <netinet/in_systm.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/tcp.h>

#define MAX_IP (65535+100)
#define MAX_PACKET (MAX_IP+sizeof(struct packet)-1)

struct packet {
	struct packet *next, *prev;
	time_t time;
	int len;
	struct sockaddr_in in;
	char data[1];
};

int holdoff_write=0;
int holdoffs=0;
int delay_drops=0;
int window_changes=0;
int bytes_in=0;
int bytes_out=0;
int add_bytes_out=0;
int packets_in=0;
int packets_out=0;
int packets_queued=0;
int bytes_queued=0;
double samples=60.0*5.0; /* 5 minute period */
double max_thru=0.0;
double load_avg;
time_t ti;

int verbose=0;

void
sigalrm(int i)
{
	if (max_thru!=0.0) {
		load_avg=load_avg*(1.0-1.0/samples)+add_bytes_out/samples;
		if (load_avg>max_thru) {
			holdoff_write = 1;
			holdoffs++;
		} else
			holdoff_write = 0;
	}
	bytes_out+=add_bytes_out;
	add_bytes_out=0;
	if (verbose)
		printf("in: %d/%d %d avg out: %d/%d %d avg queued: %d/%d drop: %d win_change: %d holdoffs: %d loadavg %.2f\n", bytes_in, packets_in, packets_in? bytes_in/packets_in: 0, bytes_out, packets_out, packets_out? bytes_out/packets_out: 0, bytes_queued, packets_queued, delay_drops, window_changes, holdoffs, load_avg);
	ti=time(NULL);
#ifdef POSIX
	signal(SIGALRM, sigalrm);
#endif
	alarm(1);
}

u_short
fast_ip_gen_check(struct ip *ip)	/* well, for C anyway.. */
{
	register u_short *u = (u_short *)ip;
	register int sum = 0;
	u_char hl = ip->ip_hl * 4;

	sum += *u++;
	sum += *u++;
	sum += *u++;
	sum += *u++;
	sum += *u++;
	        u++;		/* skip checksum */
	sum += *u++;
	sum += *u++;
	sum += *u++;
	sum += *u++;
	if (hl != sizeof (*ip))
		for (hl = (hl - sizeof (*ip)) / 2; hl--;)
			sum += *u++;
	sum = (sum >> 16) + (sum & 0xffff);	/* fold carries */
	return (u_short) ~sum;
}


u_short
fast_tcp_gen_check(struct ip *ip)	/* well, for C anyway.. */
{
	register u_short *u;
	register int sum = 0;
	u_short len = ntohs(ip->ip_len) - ip->ip_hl * 4;
	u_char resprot[2] =
	{0, IPPROTO_TCP};

	u = (u_short *) &ip->ip_src;
	sum += *u++;
	sum += *u++;
	sum += *u++;
	sum += *u++;
	sum += *(u_short *) resprot;
	sum += htons(len);
	u = (u_short *) ((char*)ip + ip->ip_hl * 4);
	sum += *u++;
	sum += *u++;
	sum += *u++;
	sum += *u++;
	sum += *u++;
	sum += *u++;
	sum += *u++;
	sum += *u++;
                u++;    /* skip th_sum */
	sum += *u++;
	if (!(len -= sizeof (struct tcphdr)))
		 goto plainhdr;

	for (; len > 15; len -= 16) {
		sum += *u++;
		sum += *u++;
		sum += *u++;
		sum += *u++;
		sum += *u++;
		sum += *u++;
		sum += *u++;
		sum += *u++;
	}
	for (; len > 1; len -= 2)
		sum += *u++;
	if (len == 1)
		sum += *(u_char *) u;
      plainhdr:
	sum = (sum >> 16) + (sum & 0xffff);	/* fold carries */
	len = ~sum;
	return len;
}

void
usage(char *av0)
{
	fprintf(stderr, "usage: %s [-b ring_buf_len][-d max_packet_delay][-p divert_port][-t max_throughput_per_sec/sample_period][-v][-w max_window]\n", av0);
	exit(1);
}

int
main(int argc, char **argv)
{
	int c;
	int fd;
	int div_port=92;
	int buf_len=256*1024; /* 256k */
	struct sockaddr_in in;
	fd_set fdr_set, fdw_set;
	int max_window=0;
	int max_ring_delay=5;
	struct packet *pq_base, *pq_head=NULL, *pq_tail;

	while ((c=getopt(argc, argv, "b:d:p:t:vw:"))!=-1)
		switch(c)
		{
		case 'b':
			buf_len=atoi(optarg);
			if (buf_len<MAX_PACKET) {
				fprintf(stderr, "%s: buf_len must be at least %d bytes\n", argv[0], MAX_PACKET);
				exit(1);
			}
			break;
		case 'd':
			max_ring_delay=atoi(optarg);
			break;
		case 'p':
			div_port=atoi(optarg);
			break;
		case 't':
			{
				char *p=strchr(optarg, '/');
				if (!p) {
					fprintf(stderr, "%s: -m requires max_throughput/period e.g -m 5000/600 is 5k/s average measured over a 10 minute period\n", argv[0]);
					usage(argv[0]);
				}
				*p++='\0';
				max_thru=atof(optarg);
				samples=atoi(p);
			}
		case 'v':
			verbose++;
			break;
		case 'w':
			max_window=atoi(optarg);
			break;
		default:
			usage(argv[0]);
		}
	if ((fd=socket(PF_INET, SOCK_RAW, IPPROTO_DIVERT))<0) {
		perror("socket");
		exit(1);
	}
	memset(&in, 0, sizeof in);
	in.sin_family=PF_INET;
	in.sin_port=htons(div_port);
	if (bind(fd, (struct sockaddr *)&in, sizeof in)!=0) {
		perror("bind");
		exit(1);
	}
	if (!(pq_tail=pq_base=malloc(buf_len))) {
		perror("malloc");
		exit(1);
	}
	fcntl(fd, F_SETFL, O_NONBLOCK|fcntl(fd, F_GETFL, 0));
	memset(pq_tail, 0, sizeof *pq_tail);
	FD_ZERO(&fdr_set);
	FD_ZERO(&fdw_set);
	signal(SIGALRM, sigalrm);
	alarm(1);
	ti=time(NULL);
	for (;;) {
		int n;
		if (bytes_queued<buf_len-MAX_PACKET-sizeof(struct packet))
			FD_SET(fd, &fdr_set);
		else
			FD_CLR(fd, &fdr_set);
		if (!holdoff_write && pq_head)
			FD_SET(fd, &fdw_set);
		else
			FD_CLR(fd, &fdw_set);
		n=select(fd+1, &fdr_set, &fdw_set, NULL, NULL);
		if (n==0)
			continue;
		if (n<0) {
			if (errno==EINTR)
				continue;
			perror("select");
			exit(1);
		}
		if (FD_ISSET(fd, &fdr_set)) {
			int cc;
			int in_len=sizeof(struct sockaddr_in);
			while ((cc=recvfrom(fd, pq_tail->data, MAX_IP, 0, (struct sockaddr*)&pq_tail->in, &in_len))>0) {
				struct packet *pq;
				pq_tail->len=cc;
				pq_tail->time=ti;
				bytes_queued+=sizeof(struct packet)-1+cc;
				bytes_in+=cc;
				if ((pq=(char*)pq_tail+sizeof(struct packet)-1+cc)+MAX_PACKET>(char *)pq_base+buf_len)
					pq=pq_base;
				if (!pq_head)
					pq_head=pq_tail;
				if (pq_tail->prev)
					pq_tail->prev->next=pq_tail;
				pq->next=NULL;
				pq->prev=pq_tail;
				pq_tail=pq;
				packets_queued++;
				packets_in++;
			}
		}
		if (packets_queued && FD_ISSET(fd, &fdw_set)) {
			if (ti-pq_head->time > max_ring_delay) {
				delay_drops++;
				goto deqeue;
			}
			if (max_window &&
			    pq_head->len>=sizeof(struct ip)+sizeof(struct tcphdr)) {
				struct ip *ip=(struct ip*)pq_head->data;
				u_short len=ntohs(ip->ip_len);
				if (ip->ip_p==IPPROTO_TCP && len==pq_head->len) {
					struct tcphdr *tcp=(struct tcphdr*)((char*)ip+ip->ip_hl*4);
					if (ntohs(tcp->th_win)>max_window) {
						tcp->th_win=htons(max_window);
						tcp->th_sum=fast_tcp_gen_check(ip);
						window_changes++;
					}
				}
			}
			/* according to divert(4) diverts to incoming 
			   need a valid ip_sum, while outgoing diverts
			   have their ip_sum recalculated by the ip stack */
			if (pq_head->in.sin_addr.s_addr!=INADDR_ANY) {
				struct ip *ip=(struct ip*)pq_head->data;
				ip->ip_sum=fast_ip_gen_check(ip);
			}
			if (sendto(fd, pq_head->data, pq_head->len, 0, (struct sockaddr*)&pq_head->in, sizeof(pq_head->in))==pq_head->len) {
				packets_out++;
				add_bytes_out+=pq_head->len;
			deqeue:
				bytes_queued-=sizeof(struct packet)-1+pq_head->len;
				pq_head=pq_head->next;
				if (pq_head)
					pq_head->prev=NULL;
				packets_queued--;
			} else {
				switch (errno) {
				case EAGAIN:
				case ENOBUFS:
					continue;
				case EMSGSIZE:
				default:
					perror("sendto");
					goto deqeue;
				}
			}
		}
	}
	exit(1);
}



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?199612230419.PAA03748>