Date: Thu, 24 Jan 2008 19:08:25 +0100 From: "Felix J. Ogris" <fjo-lists@ogris.de> To: <freebsd-net@freebsd.org> Subject: carp(4) ip loadbalancing (patch included) Message-ID: <C3BE92AA.BF4E1%fjo-lists@ogris.de>
next in thread | raw e-mail | index | archive | help
[-- Attachment #1 --]
Hi,
I have extended ip_carp.c to provide loadbalancing on an ip basis, eg. to
setup an active/active cluster. The algorithm is quite simple. Each cluster
consists of N nodes. If an IPv4/IPv6 packet reaches node X, then it
evaluates X == N mod source_address_of_IP_packet. If this is true, then the
node will process that packet (carp_forus() returns 1). I had to introduce a
new carp type (#define CARP_HELO 0x00) which by default is multicasted every
3 seconds (advertising base). Each node maintains a list of its IPv4 and
IPv6 neighbours. Neighbours are timed out after 3 * advbase seconds. Network
layer protocols other than IPv4/IPv6 are handled by the carp master. TCP
packets, which have their ACK flag set or their SYN flag not set, are
checked for an entry in the syncache or in tcbinfo. If an entry is found,
then that TCP packet will not be handled by the current node (carp_forus()
returns 0). This guarantees that an established TCP connection will stick to
a host, even if a dead node reenters the cluster.
I have put my patch (against 6.3-RELEASE) under the same license as the
FreeBSD kernel itself. So feel free to apply it to the official tree - or to
punish me for my lousy work :-)
Regards,
Felix
[-- Attachment #2 --]
diff -dru sys0/net/if_bridge.c sys/net/if_bridge.c
--- sys0/net/if_bridge.c 2007-12-21 05:30:47.000000000 +0000
+++ sys/net/if_bridge.c 2008-01-23 23:29:02.000000000 +0000
@@ -2176,10 +2176,10 @@
#ifdef DEV_CARP
# define OR_CARP_CHECK_WE_ARE_DST(iface) \
|| ((iface)->if_carp \
- && carp_forus((iface)->if_carp, eh->ether_dhost))
+ && carp_forus((iface)->if_carp, eh->ether_dhost, m))
# define OR_CARP_CHECK_WE_ARE_SRC(iface) \
|| ((iface)->if_carp \
- && carp_forus((iface)->if_carp, eh->ether_shost))
+ && carp_forus((iface)->if_carp, eh->ether_shost, m))
#else
# define OR_CARP_CHECK_WE_ARE_DST(iface)
# define OR_CARP_CHECK_WE_ARE_SRC(iface)
diff -dru sys0/net/if_ethersubr.c sys/net/if_ethersubr.c
--- sys0/net/if_ethersubr.c 2007-09-17 17:50:49.000000000 +0000
+++ sys/net/if_ethersubr.c 2008-01-23 23:29:29.000000000 +0000
@@ -662,7 +662,7 @@
* evaluation, to see if the carp ether_dhost values break any
* of these checks!
*/
- if (ifp->if_carp && carp_forus(ifp->if_carp, eh->ether_dhost))
+ if (ifp->if_carp && carp_forus(ifp->if_carp, eh->ether_dhost, m))
goto pre_stats;
#endif
/*
diff -dru sys0/netinet/ip_carp.c sys/netinet/ip_carp.c
--- sys0/netinet/ip_carp.c 2007-06-06 16:20:50.000000000 +0000
+++ sys/netinet/ip_carp.c 2008-01-23 23:22:12.000000000 +0000
@@ -3,6 +3,7 @@
/*
* Copyright (c) 2002 Michael Shalayeff. All rights reserved.
* Copyright (c) 2003 Ryan McBride. All rights reserved.
+ * Copyright (c) 2008 Felix J. Ogris. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -88,6 +89,32 @@
static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces");
SYSCTL_DECL(_net_inet_carp);
+/* iplb */
+#include <netinet/in_pcb.h>
+#ifdef INET6
+#include <netinet6/in6_pcb.h>
+#endif
+#include <netinet/tcp_var.h>
+#define CARP_HELO 0x00
+struct carp_peer {
+ struct {
+ sa_family_t sa_family;
+ char sa_data[16];
+ } cp_sa;
+ struct timeval cp_tv;
+ TAILQ_ENTRY(carp_peer) cp_list;
+};
+TAILQ_HEAD(carp_peer_list, carp_peer);
+static void carp_iplb_cleanup(void*);
+static int carp_iplb_enable = 0;
+static int carp_iplb_mask = 32;
+SYSCTL_INT(_net_inet_carp, OID_AUTO, ip_balance, CTLFLAG_RW,
+ &carp_iplb_enable, 0, "balance IP responses");
+SYSCTL_INT(_net_inet_carp, OID_AUTO, ip_balance_mask, CTLFLAG_RW,
+ &carp_iplb_mask, 0, "IP stickyness mask");
+extern struct syncache *syncache_lookup(struct in_conninfo*, struct syncache_head**);
+/* iplb */
+
struct carp_softc {
struct ifnet *sc_ifp; /* Interface clue */
struct ifnet *sc_carpdev; /* Pointer to parent interface */
@@ -128,6 +155,20 @@
struct callout sc_md6_tmo; /* master down timeout */
LIST_ENTRY(carp_softc) sc_next; /* Interface clue */
+
+ /* iplb */
+ struct callout sc_peers_tmo; /* helo timeout */
+ struct carp_peer_list sc_peers; /* IPv4 peers */
+ struct carp_peer_list sc_peers6 /* IPv6 peers */;
+ int sc_hosts; /* IPv4 hosts in this cluster */
+ int sc_host; /* my IPv4 position */
+ int sc_hosts6; /* IPv6 hosts in this cluster */
+ int sc_host6; /* my IPv6 position */
+ struct mtx sc_peers_mtx; /* mutex for sc_peers */
+ struct mtx sc_peers6_mtx; /* mutex for sc_peers6 */
+ char sc_peers_mtx_name[64]; /* name of sc_peers_mtx */
+ char sc_peers6_mtx_name[64]; /* name of sc_peers6_mtx */
+ /* iplb */
};
#define SC2IFP(sc) ((sc)->sc_ifp)
@@ -354,7 +395,7 @@
static int
carp_clone_create(struct if_clone *ifc, int unit)
{
-
+ struct timeval tv;
struct carp_softc *sc;
struct ifnet *ifp;
@@ -396,6 +437,25 @@
mtx_lock(&carp_mtx);
LIST_INSERT_HEAD(&carpif_list, sc, sc_next);
mtx_unlock(&carp_mtx);
+
+ /* iplb */
+ strcpy(sc->sc_peers_mtx_name, SC2IFP(sc)->if_xname);
+ strcat(sc->sc_peers_mtx_name, "_peers_mtx");
+ mtx_init(&sc->sc_peers_mtx, sc->sc_peers_mtx_name, NULL, MTX_DEF);
+ strcpy(sc->sc_peers6_mtx_name, SC2IFP(sc)->if_xname);
+ strcat(sc->sc_peers6_mtx_name, "_peers6_mtx");
+ mtx_init(&sc->sc_peers6_mtx, sc->sc_peers6_mtx_name, NULL, MTX_DEF);
+ TAILQ_INIT(&sc->sc_peers);
+ TAILQ_INIT(&sc->sc_peers6);
+ sc->sc_hosts = 0;
+ sc->sc_host = -1;
+ sc->sc_hosts6 = 0;
+ sc->sc_host6 = -1;
+ tv.tv_sec = CARP_DFLTINTV;
+ tv.tv_usec = 0;
+ callout_init(&sc->sc_peers_tmo, NET_CALLOUT_MPSAFE);
+ callout_reset(&sc->sc_peers_tmo, tvtohz(&tv), carp_iplb_cleanup, sc);
+
return (0);
}
@@ -403,11 +463,27 @@
carp_clone_destroy(struct ifnet *ifp)
{
struct carp_softc *sc = ifp->if_softc;
+ struct carp_peer *cp, *cp_temp;
if (sc->sc_carpdev)
CARP_SCLOCK(sc);
carpdetach(sc, 1); /* Returns unlocked. */
+ /* iplb */
+ callout_stop(&sc->sc_peers_tmo);
+ mtx_destroy(&sc->sc_peers_mtx);
+ mtx_destroy(&sc->sc_peers6_mtx);
+ sc->sc_host = -1;
+ sc->sc_hosts = 0;
+ sc->sc_host6 = -1;
+ sc->sc_hosts6 = 0;
+ TAILQ_FOREACH_SAFE(cp, &sc->sc_peers, cp_list, cp_temp) {
+ TAILQ_REMOVE(&sc->sc_peers, cp, cp_list);
+ }
+ TAILQ_FOREACH_SAFE(cp, &sc->sc_peers6, cp_list, cp_temp) {
+ TAILQ_REMOVE(&sc->sc_peers6, cp, cp_list);
+ }
+
mtx_lock(&carp_mtx);
LIST_REMOVE(sc, sc_next);
mtx_unlock(&carp_mtx);
@@ -474,6 +550,466 @@
}
}
+/* iplb */
+static void
+carp_iplb_logpeer(struct carp_softc *sc, struct carp_peer *cp, char *event)
+{
+ switch (cp->cp_sa.sa_family) {
+#ifdef INET6
+ case AF_INET6:
+ CARP_LOG("%s: %s: %hhx%hhx:%hhx%hhx:%hhx%hhx:%hhx%hhx:"
+ "%hhx%hhx:%hhx%hhx:%hhx%hhx:%hhx%hhx\n",
+ SC2IFP(sc)->if_xname, event,
+ cp->cp_sa.sa_data[0], cp->cp_sa.sa_data[1],
+ cp->cp_sa.sa_data[2], cp->cp_sa.sa_data[3],
+ cp->cp_sa.sa_data[4], cp->cp_sa.sa_data[5],
+ cp->cp_sa.sa_data[6], cp->cp_sa.sa_data[7],
+ cp->cp_sa.sa_data[8], cp->cp_sa.sa_data[9],
+ cp->cp_sa.sa_data[10], cp->cp_sa.sa_data[11],
+ cp->cp_sa.sa_data[12], cp->cp_sa.sa_data[13],
+ cp->cp_sa.sa_data[14], cp->cp_sa.sa_data[15]);
+ break;
+#endif
+#ifdef INET
+ case AF_INET:
+ CARP_LOG("%s: %s: %hhu.%hhu.%hhu.%hhu\n",
+ SC2IFP(sc)->if_xname, event,
+ cp->cp_sa.sa_data[0], cp->cp_sa.sa_data[1],
+ cp->cp_sa.sa_data[2], cp->cp_sa.sa_data[3]);
+ break;
+#endif
+ default:
+ CARP_DEBUG("%s: carp_iplb_logpeer: unknown address family %i\n",
+ SC2IFP(sc)->if_xname, cp->cp_sa.sa_family);
+ return;
+ }
+}
+
+static int
+carp_iplb_memcmp (unsigned char *a, unsigned char *b, int len)
+{
+ while (len) {
+ if (*a > *b) return 1;
+ if (*a < *b) return -1;
+ ++a;
+ ++b;
+ --len;
+ }
+ return 0;
+}
+
+static void
+carp_iplb_addpeer(struct carp_softc *sc, int af, void *address)
+{
+ struct carp_peer_list *peers;
+ struct carp_peer *cp, *cp_temp;
+ int ia_len, notfound = 1;
+ struct mtx *mutex;
+
+ if (!carp_iplb_enable)
+ return;
+
+ switch (af) {
+#ifdef INET6
+ case AF_INET6:
+ ia_len = 16;
+ peers = &sc->sc_peers6;
+ mutex = &sc->sc_peers6_mtx;
+ break;
+#endif
+#ifdef INET
+ case AF_INET:
+ ia_len = 4;
+ peers = &sc->sc_peers;
+ mutex = &sc->sc_peers_mtx;
+ break;
+#endif
+ default:
+ CARP_DEBUG("%s: carp_iplb_addpeer: unknown address family %i\n",
+ SC2IFP(sc)->if_xname, af);
+ return;
+ }
+
+ mtx_lock(mutex);
+ TAILQ_FOREACH(cp_temp, peers, cp_list) {
+ if (cp_temp->cp_sa.sa_family != af)
+ continue;
+ /* XXX: memcmp/bcmp doesn't work */
+ notfound = carp_iplb_memcmp(cp_temp->cp_sa.sa_data, address, ia_len);
+ if (notfound >= 0)
+ break;
+ }
+
+ if (notfound) {
+ MALLOC(cp, struct carp_peer *, sizeof(*cp),
+ M_CARP, M_WAITOK|M_ZERO);
+ if (!cp) {
+ CARP_LOG("%s: carp_iplb_addpeer: out of memory!",
+ SC2IFP(sc)->if_xname);
+ return;
+ }
+ memcpy(&cp->cp_sa.sa_data, address, ia_len);
+ cp->cp_sa.sa_family = af;
+ getmicrotime(&cp->cp_tv);
+
+ carp_iplb_logpeer(sc, cp, "new peer");
+
+ if (cp_temp)
+ TAILQ_INSERT_BEFORE(cp_temp, cp, cp_list);
+ else
+ TAILQ_INSERT_TAIL(peers, cp, cp_list);
+ }
+ else
+ getmicrotime(&cp_temp->cp_tv);
+
+ mtx_unlock(mutex);
+}
+
+static void
+carp_iplb_send_helo(struct carp_softc *sc)
+{
+ struct carp_header ch;
+ struct carp_header *ch_ptr;
+ struct mbuf *m;
+ struct m_tag *mtag;
+ struct ifnet *ifp = SC2IFP(sc);
+ int len;
+
+ if (!carp_iplb_enable)
+ return;
+
+ ch.carp_version = CARP_VERSION;
+ ch.carp_type = CARP_HELO;
+ ch.carp_vhid = sc->sc_vhid;
+ ch.carp_advbase = 0;
+ ch.carp_advskew = 0;
+ ch.carp_authlen = 7; /* XXX DEFINE */
+ ch.carp_pad1 = 0; /* must be zero */
+ ch.carp_cksum = 0;
+ ch.carp_counter[0] = 0;
+ ch.carp_counter[1] = 0;
+
+ carp_hmac_generate(sc, ch.carp_counter, ch.carp_md);
+
+#ifdef INET
+ if (sc->sc_ia) {
+ struct ip *ip;
+
+ MGETHDR(m, M_DONTWAIT, MT_HEADER);
+ if (m == NULL)
+ return;
+ len = sizeof(*ip) + sizeof(ch);
+ m->m_pkthdr.len = len;
+ m->m_pkthdr.rcvif = NULL;
+ m->m_len = len;
+ MH_ALIGN(m, m->m_len);
+ m->m_flags |= M_MCAST;
+ ip = mtod(m, struct ip *);
+ ip->ip_v = IPVERSION;
+ ip->ip_hl = sizeof(*ip) >> 2;
+ ip->ip_tos = IPTOS_LOWDELAY;
+ ip->ip_len = len;
+ ip->ip_id = ip_newid();
+ ip->ip_off = IP_DF;
+ ip->ip_ttl = CARP_DFLTTL;
+ ip->ip_p = IPPROTO_CARP;
+ ip->ip_sum = 0;
+ ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr;
+ ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
+
+ ch_ptr = (struct carp_header *)(&ip[1]);
+ bcopy(&ch, ch_ptr, sizeof(ch));
+
+ /* Tag packet for carp_output */
+ mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), M_NOWAIT);
+ if (mtag == NULL) {
+ CARP_LOG("%s: carp_iplb_send_helo: m_tag_get failed\n",
+ SC2IFP(sc)->if_xname);
+ m_freem(m);
+ return;
+ }
+ bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *));
+ m_tag_prepend(m, mtag);
+
+ m->m_data += sizeof(*ip);
+ ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
+ m->m_data -= sizeof(*ip);
+
+#if 0
+ getmicrotime(&SC2IFP(sc)->if_lastchange);
+ SC2IFP(sc)->if_opackets++;
+ SC2IFP(sc)->if_obytes += len;
+ carpstats.carps_opackets++;
+#endif
+
+ if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL))
+ CARP_LOG("%s: carp_iplb_send_helo: ip_output failed\n",
+ SC2IFP(sc)->if_xname);
+ }
+#endif /* INET */
+#ifdef INET6
+ if (sc->sc_ia6) {
+ struct ip6_hdr *ip6;
+
+ MGETHDR(m, M_DONTWAIT, MT_HEADER);
+ if (m == NULL)
+ return;
+ len = sizeof(*ip6) + sizeof(ch);
+ m->m_pkthdr.len = len;
+ m->m_pkthdr.rcvif = NULL;
+ m->m_len = len;
+ MH_ALIGN(m, m->m_len);
+ m->m_flags |= M_MCAST;
+ ip6 = mtod(m, struct ip6_hdr *);
+ bzero(ip6, sizeof(*ip6));
+ ip6->ip6_vfc |= IPV6_VERSION;
+ ip6->ip6_hlim = CARP_DFLTTL;
+ ip6->ip6_nxt = IPPROTO_CARP;
+ bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src,
+ sizeof(struct in6_addr));
+ /* set the multicast destination */
+
+ ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
+ ip6->ip6_dst.s6_addr8[15] = 0x12;
+ if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
+ SC2IFP(sc)->if_oerrors++;
+ m_freem(m);
+ CARP_LOG("%s: in6_setscope failed\n", __func__);
+ return;
+ }
+
+ ch_ptr = (struct carp_header *)(&ip6[1]);
+ bcopy(&ch, ch_ptr, sizeof(ch));
+
+ /* Tag packet for carp_output */
+ mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), M_NOWAIT);
+ if (mtag == NULL) {
+ CARP_LOG("%s: carp_iplb_send_helo: m_tag_get failed\n",
+ SC2IFP(sc)->if_xname);
+ m_freem(m);
+ return;
+ }
+ bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *));
+ m_tag_prepend(m, mtag);
+
+ m->m_data += sizeof(*ip6);
+ ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6));
+ m->m_data -= sizeof(*ip6);
+
+#if 0
+ getmicrotime(&SC2IFP(sc)->if_lastchange);
+ SC2IFP(sc)->if_opackets++;
+ SC2IFP(sc)->if_obytes += len;
+ carpstats.carps_opackets6++;
+#endif
+
+ if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL))
+ CARP_LOG("%s: carp_iplb_send_helo: ip6_output failed\n",
+ SC2IFP(sc)->if_xname);
+ }
+#endif /* INET6 */
+}
+
+static int
+carp_iplb_forus (struct mbuf *m_in, struct carp_softc *sc)
+{
+ struct mbuf *m;
+ struct ether_header *eh;
+ int hosts, host;
+ int match = (sc->sc_state == MASTER);
+ uint32_t addr;
+ struct tcphdr *tcp;
+ struct in_conninfo inc;
+ struct syncache_head *sch;
+#ifdef INET
+ struct ip *ip;
+#endif
+#ifdef INET6
+ struct ip6_hdr *ip6;
+#endif
+
+ if (!carp_iplb_enable)
+ return match;
+
+ if (m_in == NULL) {
+ CARP_LOG("%s: carp_iplb_forus: m_in is null\n",
+ SC2IFP(sc)->if_xname);
+ return match;
+ }
+
+ if ((m = m_dup(m_in, M_TRYWAIT)) == NULL) {
+ CARP_LOG("%s: carp_iplb_forus: dup failed\n",
+ SC2IFP(sc)->if_xname);
+ return match;
+ }
+
+ if ((m = m_pullup(m, ETHER_HDR_LEN)) == NULL)
+ goto PULLUP_FAILED;
+
+ if ((eh = mtod(m, struct ether_header *)) == NULL) {
+ CARP_LOG("%s: carp_iplb_forus: mtod failed\n",
+ SC2IFP(sc)->if_xname);
+ goto RETURN;
+ }
+
+ switch (ntohs(eh->ether_type)) {
+#ifdef INET
+ case ETHERTYPE_IP:
+ m = m_pullup(m, ETHER_HDR_LEN + sizeof(struct ip));
+ if (m == NULL)
+ goto PULLUP_FAILED;
+ ip = (struct ip *)(eh + 1);
+ addr = ntohl(ip->ip_src.s_addr);
+ hosts = sc->sc_hosts;
+ host = sc->sc_host;
+ if (ip->ip_p != IPPROTO_TCP)
+ break;
+ m = m_pullup(m, ETHER_HDR_LEN + (ip->ip_hl << 2) + sizeof(struct tcphdr));
+ if (m == NULL)
+ goto PULLUP_FAILED;
+
+ tcp = (struct tcphdr *)((int8_t*) ip + (ip->ip_hl << 2));
+ if ((tcp->th_flags & (TH_SYN|TH_ACK)) == TH_SYN)
+ break;
+
+ inc.inc_isipv6 = 0;
+ inc.inc_fport = tcp->th_sport;
+ inc.inc_lport = tcp->th_dport;
+ inc.inc_faddr = ip->ip_src;
+ inc.inc_laddr = ip->ip_dst;
+ match = (syncache_lookup(&inc, &sch) != NULL) ||
+ (in_pcblookup_hash(&tcbinfo,
+ ip->ip_src, tcp->th_sport,
+ ip->ip_dst, tcp->th_dport,
+ 0, m->m_pkthdr.rcvif));
+ goto RETURN;
+#endif
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ m = m_pullup(m, ETHER_HDR_LEN + sizeof(struct ip6_hdr));
+ if (m == NULL)
+ goto PULLUP_FAILED;
+ ip6 = (struct ip6_hdr *)(eh + 1);
+ addr = ntohl(ip6->ip6_src.s6_addr32[3]);
+ hosts = sc->sc_hosts6;
+ host = sc->sc_host6;
+ if (ip6->ip6_nxt != IPPROTO_TCP)
+ break;
+ m = m_pullup(m, ETHER_HDR_LEN + sizeof(struct ip6_hdr) +
+ sizeof(struct tcphdr));
+ if (m == NULL)
+ goto PULLUP_FAILED;
+
+ tcp = (struct tcphdr *)((int8_t*) ip6 + sizeof(struct ip6_hdr));
+ if ((tcp->th_flags & (TH_SYN|TH_ACK)) == TH_SYN)
+ break;
+
+ inc.inc_isipv6 = 1;
+ inc.inc_fport = tcp->th_sport;
+ inc.inc_lport = tcp->th_dport;
+ inc.inc6_faddr = ip6->ip6_src;
+ inc.inc6_laddr = ip6->ip6_dst;
+ match = (syncache_lookup(&inc, &sch) != NULL) ||
+ (in6_pcblookup_hash(&tcbinfo,
+ &ip6->ip6_src, tcp->th_sport,
+ &ip6->ip6_dst, tcp->th_dport,
+ 0, m->m_pkthdr.rcvif));
+ goto RETURN;
+#endif
+ default:
+ goto RETURN;
+ }
+
+ if ((hosts <= 0) ||
+ (host < 0) ||
+ (host >= hosts))
+ goto RETURN;
+
+ if ((carp_iplb_mask < 0) || (carp_iplb_mask > 32)) {
+ CARP_LOG("%s: adjusting ip_balance_mask %i -> 32\n",
+ SC2IFP(sc)->if_xname, carp_iplb_mask);
+ carp_iplb_mask = 32;
+ }
+
+ addr >>= 32 - carp_iplb_mask;
+ addr %= hosts;
+ match = (addr == host);
+
+ goto RETURN;
+
+PULLUP_FAILED:
+ CARP_LOG("%s: carp_iplb_forus: pullup failed\n", SC2IFP(sc)->if_xname);
+RETURN:
+ m_freem(m);
+ return match;
+}
+
+static void
+carp_iplb_cleanup2(struct carp_softc *sc, struct carp_peer_list *peers,
+ void *address, int *sc_hosts, int *sc_host,
+ int ia_len, int ip_ver, struct mtx *mutex)
+{
+ struct carp_peer *cp, *cp_temp;
+ struct timeval tv;
+ int hosts = 0, host = -1;
+
+ getmicrotime(&tv);
+
+ mtx_lock(mutex);
+ TAILQ_FOREACH_SAFE(cp, peers, cp_list, cp_temp) {
+ if (cp->cp_tv.tv_sec + sc->sc_advbase * 3 < tv.tv_sec) {
+ carp_iplb_logpeer(sc, cp, "vanished peer");
+ TAILQ_REMOVE(peers, cp, cp_list);
+ }
+ else {
+ if (!bcmp(address, cp->cp_sa.sa_data, ia_len))
+ host = hosts;
+ ++hosts;
+ }
+ }
+ if (hosts != *sc_hosts) {
+ CARP_LOG("%s: IPv%i hosts %i -> %i\n",
+ SC2IFP(sc)->if_xname, ip_ver, *sc_hosts, hosts);
+ *sc_hosts = hosts;
+ }
+ if (host != *sc_host) {
+ CARP_LOG("%s: IPv%i host %i -> %i\n",
+ SC2IFP(sc)->if_xname, ip_ver, *sc_host, host);
+ *sc_host = host;
+ }
+ mtx_unlock(mutex);
+}
+
+static void
+carp_iplb_cleanup(void *v)
+{
+ struct carp_softc *sc = (struct carp_softc*) v;
+ void *address;
+ struct timeval tv;
+
+#ifdef INET
+ if (sc->sc_ia) {
+ address = &sc->sc_ia->ia_addr.sin_addr;
+ carp_iplb_addpeer(sc, AF_INET, address);
+ carp_iplb_cleanup2(sc, &sc->sc_peers, address, &sc->sc_hosts,
+ &sc->sc_host, 4, 4, &sc->sc_peers_mtx);
+ }
+#endif
+#ifdef INET6
+ if (sc->sc_ia6) {
+ address = &sc->sc_ia6->ia_addr.sin6_addr;
+ carp_iplb_addpeer(sc, AF_INET6, address);
+ carp_iplb_cleanup2(sc, &sc->sc_peers6, address, &sc->sc_hosts6,
+ &sc->sc_host6, 6, 16, &sc->sc_peers6_mtx);
+ }
+#endif
+
+ tv.tv_sec = sc->sc_advbase;
+ tv.tv_usec = 0;
+ callout_reset(&sc->sc_peers_tmo, tvtohz(&tv), carp_iplb_cleanup, sc);
+}
+/* iplb */
+
/*
* process input packet.
* we have rearranged checks order compared to the rfc,
@@ -689,6 +1225,34 @@
return;
}
+ /* iplb: check helo */
+ if (ch->carp_type == CARP_HELO) {
+ void *v;
+ switch (af) {
+#ifdef INET6
+ case AF_INET6:
+ v = mtod(m, struct ip6_hdr *);
+ carp_iplb_addpeer(sc, af,
+ &((struct ip6_hdr*) v)->ip6_src);
+ break;
+#endif
+#ifdef INET
+ case AF_INET:
+ v = mtod(m, struct ip *);
+ carp_iplb_addpeer(sc, af, &((struct ip*) v)->ip_src);
+ break;
+#endif
+ default:
+ CARP_LOG("%s, carp_input_c: unknown address family %i\n",
+ SC2IFP(sc)->if_xname, af);
+ break;
+ }
+
+ CARP_UNLOCK(ifp->if_carp);
+ m_freem(m);
+ return;
+ }
+
tmp_counter = ntohl(ch->carp_counter[0]);
tmp_counter = tmp_counter<<32;
tmp_counter += ntohl(ch->carp_counter[1]);
@@ -839,6 +1403,8 @@
CARP_SCLOCK_ASSERT(sc);
+ carp_iplb_send_helo(sc);
+
/* bow out if we've lost our UPness or RUNNINGuiness */
if (!((SC2IFP(sc)->if_flags & IFF_UP) &&
(SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) {
@@ -1224,7 +1790,7 @@
#endif
struct ifnet *
-carp_forus(void *v, void *dhost)
+carp_forus(void *v, void *dhost, struct mbuf *m)
{
struct carp_if *cif = v;
struct carp_softc *vh;
@@ -1237,7 +1803,8 @@
TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list)
if ((SC2IFP(vh)->if_flags & IFF_UP) &&
(SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
- vh->sc_state == MASTER &&
+ // (vh->sc_state == MASTER) &&
+ carp_iplb_forus(m, vh) &&
!bcmp(dhost, IFP2ENADDR(vh->sc_ifp), ETHER_ADDR_LEN)) {
CARP_UNLOCK(cif);
return (SC2IFP(vh));
@@ -1353,6 +1920,9 @@
carp_master_down, sc);
break;
}
+
+ carp_iplb_send_helo(sc);
+
break;
case MASTER:
tv.tv_sec = sc->sc_advbase;
diff -dru sys0/netinet/ip_carp.h sys/netinet/ip_carp.h
--- sys0/netinet/ip_carp.h 2006-08-10 10:10:12.000000000 +0000
+++ sys/netinet/ip_carp.h 2008-01-23 23:28:02.000000000 +0000
@@ -157,6 +157,6 @@
u_int8_t **);
struct ifaddr *carp_iamatch6(void *, struct in6_addr *);
void *carp_macmatch6(void *, struct mbuf *, const struct in6_addr *);
-struct ifnet *carp_forus (void *, void *);
+struct ifnet *carp_forus (void *, void *, struct mbuf *);
#endif
#endif /* _IP_CARP_H */
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?C3BE92AA.BF4E1%fjo-lists>
