Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 8 Oct 2011 05:41:19 +0000 (UTC)
From:      Lawrence Stewart <lstewart@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r226137 - in projects/diffused_head/sys: modules/diffuse modules/diffuse/diffuse_classifier_nbayes netinet/ipfw
Message-ID:  <201110080541.p985fJKS044811@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: lstewart
Date: Sat Oct  8 05:41:19 2011
New Revision: 226137
URL: http://svn.freebsd.org/changeset/base/226137

Log:
  A fixed-point math based naive-bayes classifier implementation in the form of a
  DIFFUSE classifier kernel module. It classifies using discretised feature
  values, and has been empirically shown to perform classifications quickly and
  with high accuracy.
  
  Sponsored by:	FreeBSD Foundation
  Reviewed by:	bz

Added:
  projects/diffused_head/sys/modules/diffuse/diffuse_classifier_nbayes/
  projects/diffused_head/sys/modules/diffuse/diffuse_classifier_nbayes/Makefile   (contents, props changed)
  projects/diffused_head/sys/netinet/ipfw/diffuse_classifier_nbayes.c   (contents, props changed)
  projects/diffused_head/sys/netinet/ipfw/diffuse_classifier_nbayes.h   (contents, props changed)
Modified:
  projects/diffused_head/sys/modules/diffuse/Makefile

Modified: projects/diffused_head/sys/modules/diffuse/Makefile
==============================================================================
--- projects/diffused_head/sys/modules/diffuse/Makefile	Sat Oct  8 05:28:17 2011	(r226136)
+++ projects/diffused_head/sys/modules/diffuse/Makefile	Sat Oct  8 05:41:19 2011	(r226137)
@@ -2,6 +2,7 @@
 
 SUBDIR=	diffuse \
 	diffuse_classifier_c45 \
+	diffuse_classifier_nbayes \
 	diffuse_feature_iat \
 	diffuse_feature_iatbd \
 	diffuse_feature_pcnt \

Added: projects/diffused_head/sys/modules/diffuse/diffuse_classifier_nbayes/Makefile
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ projects/diffused_head/sys/modules/diffuse/diffuse_classifier_nbayes/Makefile	Sat Oct  8 05:41:19 2011	(r226137)
@@ -0,0 +1,8 @@
+# $FreeBSD$
+
+.PATH:	${.CURDIR}/../../../netinet/ipfw
+
+KMOD=	diffuse_classifier_nbayes
+SRCS=	diffuse_classifier_nbayes.c
+
+.include <bsd.kmod.mk>

Added: projects/diffused_head/sys/netinet/ipfw/diffuse_classifier_nbayes.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ projects/diffused_head/sys/netinet/ipfw/diffuse_classifier_nbayes.c	Sat Oct  8 05:41:19 2011	(r226137)
@@ -0,0 +1,302 @@
+/*-
+ * Copyright (c) 2010-2011
+ * 	Swinburne University of Technology, Melbourne, Australia.
+ * All rights reserved.
+ *
+ * This software was developed at the Centre for Advanced Internet
+ * Architectures, Swinburne University of Technology, by Sebastian Zander, made
+ * possible in part by a gift from The Cisco University Research Program Fund, a
+ * corporate advised fund of Silicon Valley Community Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * DIFFUSE Naive Bayes classifier.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifdef _KERNEL
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/mbuf.h>
+#else
+#include <sys/types.h>
+#include <stdio.h>
+#endif /* _KERNEL */
+#include <sys/socket.h>
+
+#include <net/if.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_fw.h>
+#include <netinet/ip_diffuse.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip6.h>
+#include <netinet/udp.h>
+#include <netinet/tcp.h>
+
+#include <netinet/ipfw/diffuse_common.h>
+#include <netinet/ipfw/diffuse_classifier.h>
+#include <netinet/ipfw/diffuse_classifier_nbayes.h>
+#ifdef _KERNEL
+#include <netinet/ipfw/diffuse_classifier_module.h>
+#include <netinet/ipfw/diffuse_private.h>
+#endif
+
+#ifdef _KERNEL
+
+/* Computes the buffer size needed to store classifier. */
+static int
+get_size(struct di_classifier_nbayes_config *c)
+{
+
+	return (sizeof(struct di_classifier_nbayes_config) + c->fdist_len);
+}
+
+/* Copy, assumes target memory is allocated. */
+static void
+cpy_conf(struct di_classifier_nbayes_config *f,
+    struct di_classifier_nbayes_config *t)
+{
+
+	t->oid = f->oid;
+	strcpy(t->model_name, f->model_name);
+	t->feature_cnt = f->feature_cnt;
+	t->class_cnt = f->class_cnt;
+	t->multi = f->multi;
+	t->fdist_len = f->fdist_len;
+	memcpy(t->fdist, f->fdist, f->fdist_len);
+}
+
+static int
+nbayes_init_instance(struct di_cdata *cdata, struct di_oid *params)
+{
+	struct di_classifier_nbayes_config *c, *conf;
+
+	c = (struct di_classifier_nbayes_config *)params;
+
+	DID("class cnt %d", c->class_cnt);
+	DID("attr cnt %d", c->feature_cnt);
+	DID("multi %d", (1 << c->multi));
+	DID("fdist_len %d", c->fdist_len);
+	DID("want size %d", get_size(c));
+
+	cdata->conf = malloc(get_size(c), M_DIFFUSE, M_NOWAIT | M_ZERO);
+	if (cdata->conf == NULL)
+		return (ENOMEM);
+
+	conf = (struct di_classifier_nbayes_config *)cdata->conf;
+	cpy_conf(c, conf);
+
+	return (0);
+}
+
+static int
+nbayes_destroy_instance(struct di_cdata *cdata)
+{
+
+	free(cdata->conf, M_DIFFUSE);
+
+	return (0);
+}
+
+static int
+nbayes_get_conf(struct di_cdata *cdata, struct di_oid *cbuf, int size_only)
+{
+	int len;
+
+	len = get_size((struct di_classifier_nbayes_config *)cdata->conf);
+
+	if (!size_only)
+		cpy_conf((struct di_classifier_nbayes_config *)cdata->conf,
+		    (struct di_classifier_nbayes_config *)cbuf);
+
+	return (len);
+}
+
+#endif /* _KERNEL */
+
+#define	LINEAR_SEARCH_THRESHOLD 5
+
+static inline struct di_nbayes_attr_disc_val *
+find_val(struct di_nbayes_attr_disc_val *first, int val_cnt, int class_cnt,
+    int32_t fval)
+{
+	struct di_nbayes_attr_disc_val *val;
+	int h, l, j, m;
+
+	val = NULL;
+
+	if (val_cnt <= LINEAR_SEARCH_THRESHOLD) {
+		/* Linear search. */
+		for (j = 0; j < val_cnt; j++) {
+			val = (struct di_nbayes_attr_disc_val *)
+			    (((char *)first) +
+			    (sizeof(struct di_nbayes_attr_disc_val) +
+			    class_cnt * sizeof(uint32_t)) * j);
+			if (fval <= val->high_val)
+				break;
+		}
+
+	} else {
+		/* Binary search. */
+		l = 0;
+		h = val_cnt - 1;
+		while (l < h) {
+			m = l + ((h - l) / 2);
+			val = (struct di_nbayes_attr_disc_val *)
+			    (((char *)first) +
+			    (sizeof(struct di_nbayes_attr_disc_val) +
+			    class_cnt * sizeof(uint32_t)) * m);
+
+			if (val->high_val < fval)
+				l = m + 1;
+			else
+				h = m;
+		}
+
+		val = (struct di_nbayes_attr_disc_val *)(((char *)first) +
+		    (sizeof(struct di_nbayes_attr_disc_val) + class_cnt *
+		    sizeof(uint32_t)) * h);
+	}
+
+	return (val);
+}
+
+int
+nbayes_classify(struct di_cdata *cdata, int32_t *features, int fcnt)
+{
+	struct di_classifier_nbayes_config *conf =
+	    (struct di_classifier_nbayes_config *)cdata->conf;
+	struct di_nbayes_attr_prior *ap;
+	struct di_nbayes_attr_disc *ad;
+	struct di_nbayes_attr_disc_val *val;
+	struct di_nbayes_attr_id *attr;
+	char *d;
+	uint64_t probs[conf->class_cnt];
+	uint64_t max_prob;
+	int divs[conf->class_cnt];
+	int best_class, divs_max, f, i, l, len;
+
+	max_prob = 0;
+	best_class = divs_max = 0;
+
+#ifdef DIFFUSE_DEBUG2
+	printf("DIFFUSE: %-10s features ", __func__);
+	for (i = 0; i < fcnt; i++)
+		printf("%u ", features[i]);
+	printf("\n");
+#endif
+
+	for (i = 0; i < conf->class_cnt; i++) {
+		divs[i] = 0;
+
+		for (l = conf->fdist_len, d = (char *)conf->fdist, f = -1;
+		    l > 0; l -= len, d += len, f++) {
+			attr = (struct di_nbayes_attr_id *)d;
+			len = attr->len;
+
+			DID("type %d(%d)", attr->type, attr->len);
+
+			if (f >= fcnt)
+				return (-1); /* Should never happen. */
+
+			if (attr->type == DI_NBAYES_ATTR_PRIOR) {
+				ap = (struct di_nbayes_attr_prior *)attr;
+				probs[i] = ap->prior_p[i];
+			} else if (attr->type == DI_NBAYES_ATTR_DISC) {
+				ad = (struct di_nbayes_attr_disc *)attr;
+
+				val = find_val(ad->val, ad->val_cnt,
+				    conf->class_cnt, features[f]);
+				probs[i] *= val->cond_p[i];
+
+				if (probs[i] >
+				    ((uint64_t)1 << (64 - conf->multi))) {
+					/*
+					 * Divide by 2^(conf->multi - 1) to avoid
+					 * overflow.
+					 */
+					probs[i] = fixp_div(probs[i], conf->multi);
+					divs[i]++;
+					if (divs[i] > divs_max)
+						divs_max = divs[i];
+				}
+			} else if (attr->type == DI_NBAYES_ATTR_NORM) {
+				return (-1); /* XXX: Not supported yet. */
+			}
+		}
+	}
+
+	/* Make sure the divisor is the same for all probs and find max prob. */
+	for (i = 0; i < conf->class_cnt; i++) {
+		probs[i] = fixp_div(probs[i], conf->multi *
+		    (divs_max - divs[i]) - (divs_max - divs[i] - 1));
+
+		DID2("class %u prob %llu %i", i, probs[i], divs[i]);
+
+		if (probs[i] > max_prob) {
+			max_prob = probs[i];
+			best_class = i;
+		}
+	}
+
+	return (best_class);
+}
+
+#ifdef _KERNEL
+
+static int
+nbayes_get_feature_cnt(struct di_cdata *cdata)
+{
+
+	return (((struct di_classifier_nbayes_config *)
+	    cdata->conf)->feature_cnt);
+}
+
+static int
+nbayes_get_class_cnt(struct di_cdata *cdata)
+{
+
+	return (((struct di_classifier_nbayes_config *)cdata->conf)->class_cnt);
+}
+
+static struct di_classifier_alg di_nbayes_desc = {
+	_FI( .name = )			"nbayes",
+	_FI( .ref_count = )		0,
+
+	_FI( .init_instance = )		nbayes_init_instance,
+	_FI( .destroy_instance = )	nbayes_destroy_instance,
+	_FI( .get_conf = )		nbayes_get_conf,
+	_FI( .classify = )		nbayes_classify,
+	_FI( .get_feature_cnt = )	nbayes_get_feature_cnt,
+	_FI( .get_class_cnt = )		nbayes_get_class_cnt,
+};
+
+DECLARE_DIFFUSE_CLASSIFIER_MODULE(nbayes, &di_nbayes_desc);
+
+#endif

Added: projects/diffused_head/sys/netinet/ipfw/diffuse_classifier_nbayes.h
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ projects/diffused_head/sys/netinet/ipfw/diffuse_classifier_nbayes.h	Sat Oct  8 05:41:19 2011	(r226137)
@@ -0,0 +1,115 @@
+/*-
+ * Copyright (c) 2010-2011
+ * 	Swinburne University of Technology, Melbourne, Australia.
+ * All rights reserved.
+ *
+ * This software was developed at the Centre for Advanced Internet
+ * Architectures, Swinburne University of Technology, by Sebastian Zander, made
+ * possible in part by a gift from The Cisco University Research Program Fund, a
+ * corporate advised fund of Silicon Valley Community Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * DIFFUSE Naive Bayes classifier.
+ */
+
+#ifndef _NETINET_IPFW_DIFFUSE_CLASSIFIER_NBAYES_H_
+#define _NETINET_IPFW_DIFFUSE_CLASSIFIER_NBAYES_H_
+
+#define	DI_NBAYES_ATTR_PRIOR	0
+#define	DI_NBAYES_ATTR_DISC	1
+#define	DI_NBAYES_ATTR_NORM	2
+
+struct di_nbayes_attr_id
+{
+	uint16_t	type;
+	uint16_t	len;
+};
+
+struct di_nbayes_attr_disc_val
+{
+	int32_t		high_val;
+	uint32_t	cond_p[];
+};
+
+struct di_nbayes_attr_disc
+{
+	struct di_nbayes_attr_id	id;
+	/* Number of values/intervals. */
+	uint32_t			val_cnt;
+	/* Interval values and conditional probs. */
+	struct di_nbayes_attr_disc_val	val[];
+};
+
+struct di_nbayes_attr_norm_class
+{
+	int32_t		mean;
+	uint32_t	stddev;
+	int32_t		wsum;
+	uint32_t	prec;
+};
+
+/* One per real attribute. */
+struct di_nbayes_attr_norm
+{
+	struct di_nbayes_attr_id		id;
+	/* class_cnt structs. */
+	struct di_nbayes_attr_norm_class	class[];
+};
+
+/* Priors, not really an attribute. */
+struct di_nbayes_attr_prior
+{
+	struct di_nbayes_attr_id	id;
+	/* class_cnt prior probs. */
+	uint32_t			prior_p[];
+};
+
+struct di_classifier_nbayes_config
+{
+	struct di_oid			oid;
+
+	/* Model name. */
+	char				model_name[DI_MAX_MODEL_STR_LEN];
+	/* Number of features. */
+	uint16_t			feature_cnt;
+	/* Number of classes. */
+	uint16_t			class_cnt;
+	/* Precision, multiplier for double->int. */
+	uint16_t			multi;
+	/* Length of fdists. */
+	uint16_t			fdist_len;
+	/* feature_cnt feature distributions. */
+	struct di_nbayes_attr_id	fdist[];
+};
+
+struct di_classifier_module * nbayes_module(void);
+
+struct di_cdata;
+
+int nbayes_classify(struct di_cdata *cdata, int32_t *features, int fcnt);
+
+#endif /* _NETINET_IPFW_DIFFUSE_CLASSIFIER_NBAYES_H_ */



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201110080541.p985fJKS044811>