Date: Sun, 26 Dec 2010 19:53:22 +0200 From: Gleb Kurtsou <gleb.kurtsou@gmail.com> To: Eugene Grosbein <egrosbein@rdtc.ru> Cc: net@freebsd.org Subject: Re: lagg/lacp poor traffic distribution Message-ID: <20101226175322.GB45598@tops> In-Reply-To: <4D0CFEFF.3000902@rdtc.ru> References: <4D0CFEFF.3000902@rdtc.ru>
next in thread | previous in thread | raw e-mail | index | archive | help
--AhhlLboLdkugWU4S Content-Type: text/plain; charset=utf-8 Content-Disposition: inline On (19/12/2010 00:35), Eugene Grosbein wrote: > Hi! > > I've loaded router using two lagg interfaces in LACP mode. > lagg0 has IP address and two ports (em0 and em1) and carry untagged frames. > lagg1 has no IP address and has two ports (igb0 and igb1) and carry > about 1000 dot-q vlans with lots of hosts in each vlan. > > For lagg1, lagg distributes outgoing traffic over two ports just fine. > For lagg0 (untagged ethernet segment with only 2 MAC addresses) > less than 0.07% (54Mbit/s max) of traffic goes to em0 > and over 99.92% goes to em1, that's bad. > > That's general traffic of several thousands of customers surfing the web, > using torrents etc. I've glanced over lagg/lacp sources if src/sys/net/ > and found nothing suspicious, it should extract and use srcIP/dstIP for hash. > > How do I debug this problem? Could you try the patch attached. It changes hash function for distributing traffic. Thanks, Gleb. > > Eugene Grosbein > _______________________________________________ > freebsd-net@freebsd.org mailing list > http://lists.freebsd.org/mailman/listinfo/freebsd-net > To unsubscribe, send any mail to "freebsd-net-unsubscribe@freebsd.org" --AhhlLboLdkugWU4S Content-Type: text/plain; charset=utf-8 Content-Disposition: attachment; filename="sfh-lagg.patch.txt" --- /dev/null +++ b/sys/sys/hash_sfh.h @@ -0,0 +1,89 @@ +/*- + * Copyright (c) 2010, Paul Hsieh + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. My name, Paul Hsieh, and the names of any other contributors to + * the code use may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _SYS_HASH_SFH_H_ +#define _SYS_HASH_SFH_H_ +#include <sys/types.h> + +static __inline uint32_t +hash_sfh_buf(const void *buf, size_t len, uint32_t hash) +{ + const uint8_t *data = buf; + uint32_t tmp; + int rem; + + if (len <= 0 || data == NULL) + return (0); + + rem = len & 3; + len >>= 2; + +#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \ + +(uint32_t)(((const uint8_t *)(d))[0]) ) + + /* Main loop */ + for (;len > 0; len--) { + hash += get16bits(data); + tmp = (get16bits(data + 2) << 11) ^ hash; + hash = (hash << 16) ^ tmp; + data += 2 * sizeof(uint16_t); + hash += hash >> 11; + } + + /* Handle end cases */ + switch (rem) { + case 3: hash += get16bits(data); + hash ^= hash << 16; + hash ^= data[sizeof(uint16_t)] << 18; + hash += hash >> 11; + break; + case 2: hash += get16bits(data); + hash ^= hash << 11; + hash += hash >> 17; + break; + case 1: hash += *data; + hash ^= hash << 10; + hash += hash >> 1; + } +#undef get16bits + + /* Force "avalanching" of final 127 bits */ + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 4; + hash += hash >> 17; + hash ^= hash << 25; + hash += hash >> 6; + + return (hash); +} +#endif /* !_SYS_HASH_SFH_H_ */ --- a/sys/net/if_lagg.c +++ b/sys/net/if_lagg.c @@ -35,7 +35,7 @@ __FBSDID("$FreeBSD$"); #include <sys/priv.h> #include <sys/systm.h> #include <sys/proc.h> -#include <sys/hash.h> +#include <sys/hash_sfh.h> #include <sys/lock.h> #include <sys/rwlock.h> #include <sys/taskqueue.h> @@ -1414,19 +1414,19 @@ lagg_hashmbuf(struct mbuf *m, uint32_t key) goto out; eh = mtod(m, struct ether_header *); etype = ntohs(eh->ether_type); - p = hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, key); - p = hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p); + p = hash_sfh_buf(&eh->ether_shost, ETHER_ADDR_LEN, key); + p = hash_sfh_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p); /* Special handling for encapsulating VLAN frames */ if (m->m_flags & M_VLANTAG) { - p = hash32_buf(&m->m_pkthdr.ether_vtag, + p = hash_sfh_buf(&m->m_pkthdr.ether_vtag, sizeof(m->m_pkthdr.ether_vtag), p); } else if (etype == ETHERTYPE_VLAN) { vlan = lagg_gethdr(m, off, sizeof(*vlan), &vlanbuf); if (vlan == NULL) goto out; - p = hash32_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p); + p = hash_sfh_buf(&vlan->evl_tag, sizeof(vlan->evl_tag), p); etype = ntohs(vlan->evl_proto); off += sizeof(*vlan) - sizeof(*eh); } @@ -1438,8 +1438,8 @@ lagg_hashmbuf(struct mbuf *m, uint32_t key) if (ip == NULL) goto out; - p = hash32_buf(&ip->ip_src, sizeof(struct in_addr), p); - p = hash32_buf(&ip->ip_dst, sizeof(struct in_addr), p); + p = hash_sfh_buf(&ip->ip_src, sizeof(struct in_addr), p); + p = hash_sfh_buf(&ip->ip_dst, sizeof(struct in_addr), p); break; #endif #ifdef INET6 @@ -1448,10 +1448,10 @@ lagg_hashmbuf(struct mbuf *m, uint32_t key) if (ip6 == NULL) goto out; - p = hash32_buf(&ip6->ip6_src, sizeof(struct in6_addr), p); - p = hash32_buf(&ip6->ip6_dst, sizeof(struct in6_addr), p); + p = hash_sfh_buf(&ip6->ip6_src, sizeof(struct in6_addr), p); + p = hash_sfh_buf(&ip6->ip6_dst, sizeof(struct in6_addr), p); flow = ip6->ip6_flow & IPV6_FLOWLABEL_MASK; - p = hash32_buf(&flow, sizeof(flow), p); /* IPv6 flow label */ + p = hash_sfh_buf(&flow, sizeof(flow), p); /* IPv6 flow label */ break; #endif } --AhhlLboLdkugWU4S--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20101226175322.GB45598>