From owner-svn-src-head@FreeBSD.ORG Thu Mar 11 22:42:34 2010 Return-Path: Delivered-To: svn-src-head@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 3831A106564A; Thu, 11 Mar 2010 22:42:34 +0000 (UTC) (envelope-from luigi@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 25F478FC17; Thu, 11 Mar 2010 22:42:34 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id o2BMgYCQ016791; Thu, 11 Mar 2010 22:42:34 GMT (envelope-from luigi@svn.freebsd.org) Received: (from luigi@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id o2BMgY9a016787; Thu, 11 Mar 2010 22:42:34 GMT (envelope-from luigi@svn.freebsd.org) Message-Id: <201003112242.o2BMgY9a016787@svn.freebsd.org> From: Luigi Rizzo Date: Thu, 11 Mar 2010 22:42:34 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r205050 - in head: sbin/ipfw sys/netinet/ipfw X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 11 Mar 2010 22:42:34 -0000 Author: luigi Date: Thu Mar 11 22:42:33 2010 New Revision: 205050 URL: http://svn.freebsd.org/changeset/base/205050 Log: implement listing of a subset of pipes/queues/schedulers. The filtering of the output is done in the kernel instead of userland to reduce the amount of data transfered. Modified: head/sbin/ipfw/dummynet.c head/sys/netinet/ipfw/ip_dn_private.h head/sys/netinet/ipfw/ip_dummynet.c Modified: head/sbin/ipfw/dummynet.c ============================================================================== --- head/sbin/ipfw/dummynet.c Thu Mar 11 22:29:45 2010 (r205049) +++ head/sbin/ipfw/dummynet.c Thu Mar 11 22:42:33 2010 (r205050) @@ -1234,53 +1234,142 @@ dummynet_flush(void) do_cmd(IP_DUMMYNET3, &oid, oid.len); } +/* Parse input for 'ipfw [pipe|sched|queue] show [range list]' + * Returns the number of ranges, and possibly stores them + * in the array v of size len. + */ +static int +parse_range(int ac, char *av[], uint32_t *v, int len) +{ + int n = 0; + char *endptr, *s; + uint32_t base[2]; + + if (v == NULL || len < 2) { + v = base; + len = 2; + } + + for (s = *av; s != NULL; av++, ac--) { + v[0] = strtoul(s, &endptr, 10); + v[1] = (*endptr != '-') ? v[0] : + strtoul(endptr+1, &endptr, 10); + if (*endptr == '\0') { /* prepare for next round */ + s = (ac > 0) ? *(av+1) : NULL; + } else { + if (*endptr != ',') { + warn("invalid number: %s", s); + s = ++endptr; + continue; + } + /* continue processing from here */ + s = ++endptr; + ac++; + av--; + } + if (v[1] < v[0] || + v[1] < 0 || v[1] >= DN_MAX_ID-1 || + v[0] < 0 || v[1] >= DN_MAX_ID-1) { + continue; /* invalid entry */ + } + n++; + /* translate if 'pipe list' */ + if (co.do_pipe == 1) { + v[0] += DN_MAX_ID; + v[1] += DN_MAX_ID; + } + v = (n*2 < len) ? v + 2 : base; + } + return n; +} + /* main entry point for dummynet list functions. co.do_pipe indicates * which function we want to support. - * XXX todo- accept filtering arguments. + * av may contain filtering arguments, either individual entries + * or ranges, or lists (space or commas are valid separators). + * Format for a range can be n1-n2 or n3 n4 n5 ... + * In a range n1 must be <= n2, otherwise the range is ignored. + * A number 'n4' is translate in a range 'n4-n4' + * All number must be > 0 and < DN_MAX_ID-1 */ void dummynet_list(int ac, char *av[], int show_counters) { - struct dn_id oid, *x = NULL; - int ret, i, l = sizeof(oid); + struct dn_id *oid, *x = NULL; + int ret, i, l; + int n; /* # of ranges */ + int buflen; + int max_size; /* largest obj passed up */ + + ac--; + av++; /* skip 'list' | 'show' word */ + + n = parse_range(ac, av, NULL, 0); /* Count # of ranges. */ + + /* Allocate space to store ranges */ + l = sizeof(*oid) + sizeof(uint32_t) * n * 2; + oid = safe_calloc(1, l); + oid_fill(oid, l, DN_CMD_GET, DN_API_VERSION); + + if (n > 0) /* store ranges in idx */ + parse_range(ac, av, (uint32_t *)(oid + 1), n*2); + /* + * Compute the size of the largest object returned. If the + * response leaves at least this much spare space in the + * buffer, then surely the response is complete; otherwise + * there might be a risk of truncation and we will need to + * retry with a larger buffer. + * XXX don't bother with smaller structs. + */ + max_size = sizeof(struct dn_fs); + if (max_size < sizeof(struct dn_sch)) + max_size = sizeof(struct dn_sch); + if (max_size < sizeof(struct dn_flow)) + max_size = sizeof(struct dn_flow); - oid_fill(&oid, l, DN_CMD_GET, DN_API_VERSION); switch (co.do_pipe) { case 1: - oid.subtype = DN_LINK; /* list pipe */ + oid->subtype = DN_LINK; /* list pipe */ break; case 2: - oid.subtype = DN_FS; /* list queue */ + oid->subtype = DN_FS; /* list queue */ break; case 3: - oid.subtype = DN_SCH; /* list sched */ + oid->subtype = DN_SCH; /* list sched */ break; } - /* Request the buffer size (in oid.id)*/ - ret = do_cmd(-IP_DUMMYNET3, &oid, (uintptr_t)&l); - // printf("%s returns %d need %d\n", __FUNCTION__, ret, oid.id); - if (ret != 0 || oid.id <= sizeof(oid)) - return; - - /* Try max 10 times - * Buffer is correct if l != 0. - * If l == 0 no buffer is sent, maybe because kernel requires - * a greater buffer, so try with the new size in x->id. + /* + * Ask the kernel an estimate of the required space (result + * in oid.id), unless we are requesting a subset of objects, + * in which case the kernel does not give an exact answer. + * In any case, space might grow in the meantime due to the + * creation of new queues, so we must be prepared to retry. */ - for (i = 0, l = oid.id; i < 10; i++, l = x->id) { + if (n > 0) { + buflen = 4*1024; + } else { + ret = do_cmd(-IP_DUMMYNET3, oid, (uintptr_t)&l); + if (ret != 0 || oid->id <= sizeof(*oid)) + goto done; + buflen = oid->id + max_size; + oid->len = sizeof(*oid); /* restore */ + } + /* Try a few times, until the buffer fits */ + for (i = 0; i < 20; i++) { + l = buflen; x = safe_realloc(x, l); - *x = oid; - ret = do_cmd(-IP_DUMMYNET3, x, (uintptr_t)&l); - - if (ret != 0 || x->id <= sizeof(oid)) - return; - - if (l != 0) + bcopy(oid, x, oid->len); + ret = do_cmd(-IP_DUMMYNET3, x, (uintptr_t)&l); + if (ret != 0 || x->id <= sizeof(*oid)) + goto done; /* no response */ + if (l + max_size <= buflen) break; /* ok */ + buflen *= 2; /* double for next attempt */ } - // printf("%s returns %d need %d\n", __FUNCTION__, ret, oid.id); - // XXX filter on ac, av list_pipes(x, O_NEXT(x, l)); - free(x); +done: + if (x) + free(x); + free(oid); } Modified: head/sys/netinet/ipfw/ip_dn_private.h ============================================================================== --- head/sys/netinet/ipfw/ip_dn_private.h Thu Mar 11 22:29:45 2010 (r205049) +++ head/sys/netinet/ipfw/ip_dn_private.h Thu Mar 11 22:42:33 2010 (r205050) @@ -359,13 +359,24 @@ struct dn_queue *ipdn_q_find(struct dn_f struct ipfw_flow_id *); struct dn_sch_inst *ipdn_si_find(struct dn_schk *, struct ipfw_flow_id *); -/* helper structure to copy objects returned to userland */ +/* + * copy_range is a template for requests for ranges of pipes/queues/scheds. + * The number of ranges is variable and can be derived by o.len. + * As a default, we use a small number of entries so that the struct + * fits easily on the stack and is sufficient for most common requests. + */ +#define DEFAULT_RANGES 5 +struct copy_range { + struct dn_id o; + uint32_t r[ 2 * DEFAULT_RANGES ]; +}; + struct copy_args { char **start; char *end; int flags; int type; - int extra; /* extra filtering */ + struct copy_range *extra; /* extra filtering */ }; struct sockopt; Modified: head/sys/netinet/ipfw/ip_dummynet.c ============================================================================== --- head/sys/netinet/ipfw/ip_dummynet.c Thu Mar 11 22:29:45 2010 (r205049) +++ head/sys/netinet/ipfw/ip_dummynet.c Thu Mar 11 22:42:33 2010 (r205050) @@ -787,7 +787,7 @@ copy_obj(char **start, char *end, void * int have = end - *start; if (have < o->len || o->len == 0 || o->type == 0) { - D("ERROR type %d %s %d have %d need %d", + D("(WARN) type %d %s %d have %d need %d", o->type, msg, i, have, o->len); return 1; } @@ -954,43 +954,64 @@ static int copy_data_helper(void *_o, void *_arg) { struct copy_args *a = _arg; + uint32_t *r = a->extra->r; /* start of first range */ + uint32_t *lim; /* first invalid pointer */ + int n; - if (a->type == DN_LINK || /* pipe show */ - a->type == DN_SCH) { /* sched show */ - struct dn_schk *s = _o; /* we get only schedulers */ - if (a->type == DN_SCH && s->sch.sched_nr >= DN_MAX_ID) - return 0; /* not valid scheduler */ - if (a->type == DN_LINK && s->sch.sched_nr <= DN_MAX_ID) - return 0; /* not valid pipe */ - if (a->flags & DN_C_LINK) { - if (copy_obj(a->start, a->end, &s->link, - "link", s->sch.sched_nr)) - return DNHT_SCAN_END; - if (copy_profile(a, s->profile)) - return DNHT_SCAN_END; - if (copy_flowset(a, s->fs, 0)) - return DNHT_SCAN_END; - } - if (a->flags & DN_C_SCH) { - if (copy_obj(a->start, a->end, &s->sch, - "sched", s->sch.sched_nr)) - return DNHT_SCAN_END; + lim = (uint32_t *)((char *)(a->extra) + a->extra->o.len); - /* list all attached flowsets */ - if (copy_fsk_list(a, s, 0)) - return DNHT_SCAN_END; - } - if (a->flags & DN_C_FLOW) { - copy_si(a, s, 0); + if (a->type == DN_LINK || a->type == DN_SCH) { + /* pipe|sched show, we receive a dn_schk */ + struct dn_schk *s = _o; + + n = s->sch.sched_nr; + if (a->type == DN_SCH && n >= DN_MAX_ID) + return 0; /* not a scheduler */ + if (a->type == DN_LINK && n <= DN_MAX_ID) + return 0; /* not a pipe */ + + /* see if the object is within one of our ranges */ + for (;r < lim; r += 2) { + if (n < r[0] || n > r[1]) + continue; + /* Found a valid entry, copy and we are done */ + if (a->flags & DN_C_LINK) { + if (copy_obj(a->start, a->end, + &s->link, "link", n)) + return DNHT_SCAN_END; + if (copy_profile(a, s->profile)) + return DNHT_SCAN_END; + if (copy_flowset(a, s->fs, 0)) + return DNHT_SCAN_END; + } + if (a->flags & DN_C_SCH) { + if (copy_obj(a->start, a->end, + &s->sch, "sched", n)) + return DNHT_SCAN_END; + /* list all attached flowsets */ + if (copy_fsk_list(a, s, 0)) + return DNHT_SCAN_END; + } + if (a->flags & DN_C_FLOW) + copy_si(a, s, 0); + break; } - } - if (a->type == DN_FS) { /* queue show, skip internal flowsets */ + } else if (a->type == DN_FS) { + /* queue show, skip internal flowsets */ struct dn_fsk *fs = _o; - if (fs->fs.fs_nr >= DN_MAX_ID) + + n = fs->fs.fs_nr; + if (n >= DN_MAX_ID) return 0; - if (copy_flowset(a, fs, 0)) - return DNHT_SCAN_END; - copy_q(a, fs, 0); + /* see if the object is within one of our ranges */ + for (;r < lim; r += 2) { + if (n < r[0] || n > r[1]) + continue; + if (copy_flowset(a, fs, 0)) + return DNHT_SCAN_END; + copy_q(a, fs, 0); + break; /* we are done */ + } } return 0; } @@ -1690,7 +1711,7 @@ do_config(void *p, int l) } static int -compute_space(struct dn_id *cmd, int *to_copy) +compute_space(struct dn_id *cmd, struct copy_args *a) { int x = 0, need = 0; int profile_size = sizeof(struct dn_profile) - @@ -1746,7 +1767,7 @@ compute_space(struct dn_id *cmd, int *to need = dn_compat_calc_size(dn_cfg); break; } - *to_copy = x; + a->flags = x; if (x & DN_C_SCH) { need += dn_cfg.schk_count * sizeof(struct dn_sch) / 2; /* NOT also, each fs might be attached to a sched */ @@ -1775,61 +1796,105 @@ dummynet_get(struct sockopt *sopt, void int have, i, need, error; char *start = NULL, *buf; size_t sopt_valsize; - struct dn_id cmd; + struct dn_id *cmd; struct copy_args a; + struct copy_range r; + int l = sizeof(struct dn_id); + + bzero(&a, sizeof(a)); + bzero(&r, sizeof(r)); /* save and restore original sopt_valsize around copyin */ sopt_valsize = sopt->sopt_valsize; + + cmd = &r.o; + if (!compat) { - error = sooptcopyin(sopt, &cmd, sizeof(cmd), sizeof(cmd)); - if (error) - return error; + /* copy at least an oid, and possibly a full object */ + error = sooptcopyin(sopt, cmd, sizeof(r), sizeof(*cmd)); sopt->sopt_valsize = sopt_valsize; + if (error) + goto done; + l = cmd->len; #ifdef EMULATE_SYSCTL /* sysctl emulation. */ - if (cmd.type == DN_SYSCTL_GET) + if (cmd->type == DN_SYSCTL_GET) return kesysctl_emu_get(sopt); #endif - } else { + if (l > sizeof(r)) { + /* request larger than default, allocate buffer */ + cmd = malloc(l, M_DUMMYNET, M_WAIT); + if (cmd == NULL) + return ENOMEM; //XXX + error = sooptcopyin(sopt, cmd, l, l); + sopt->sopt_valsize = sopt_valsize; + if (error) + goto done; + } + } else { /* compatibility */ error = 0; - cmd.type = DN_CMD_GET; - cmd.len = sizeof(struct dn_id); - cmd.subtype = DN_GET_COMPAT; - // cmd.id = sopt_valsize; + cmd->type = DN_CMD_GET; + cmd->len = sizeof(struct dn_id); + cmd->subtype = DN_GET_COMPAT; + // cmd->id = sopt_valsize; D("compatibility mode"); } + a.extra = (struct copy_range *)cmd; + if (cmd->len == sizeof(*cmd)) { /* no range, create a default */ + uint32_t *rp = (uint32_t *)(cmd + 1); + cmd->len += 2* sizeof(uint32_t); + rp[0] = 1; + rp[1] = DN_MAX_ID - 1; + if (cmd->subtype == DN_LINK) { + rp[0] += DN_MAX_ID; + rp[1] += DN_MAX_ID; + } + } /* Count space (under lock) and allocate (outside lock). * Exit with lock held if we manage to get enough buffer. * Try a few times then give up. */ for (have = 0, i = 0; i < 10; i++) { DN_BH_WLOCK(); - need = compute_space(&cmd, &a.flags); + need = compute_space(cmd, &a); + + /* if there is a range, ignore value from compute_space() */ + if (l > sizeof(*cmd)) + need = sopt_valsize - sizeof(*cmd); + if (need < 0) { DN_BH_WUNLOCK(); - return EINVAL; + error = EINVAL; + goto done; } - need += sizeof(cmd); - cmd.id = need; + need += sizeof(*cmd); + cmd->id = need; if (have >= need) break; + DN_BH_WUNLOCK(); if (start) free(start, M_DUMMYNET); start = NULL; if (need > sopt_valsize) break; + have = need; start = malloc(have, M_DUMMYNET, M_WAITOK | M_ZERO); - if (start == NULL) - return ENOMEM; + if (start == NULL) { + error = ENOMEM; + goto done; + } } + if (start == NULL) { if (compat) { *compat = NULL; - return 1; // XXX + error = 1; // XXX + } else { + error = sooptcopyout(sopt, cmd, sizeof(*cmd)); } - return sooptcopyout(sopt, &cmd, sizeof(cmd)); + goto done; } ND("have %d:%d sched %d, %d:%d links %d, %d:%d flowsets %d, " "%d:%d si %d, %d:%d queues %d", @@ -1839,10 +1904,12 @@ dummynet_get(struct sockopt *sopt, void dn_cfg.si_count, sizeof(struct dn_flow), DN_SCH_I, dn_cfg.queue_count, sizeof(struct dn_queue), DN_QUEUE); sopt->sopt_valsize = sopt_valsize; - a.type = cmd.subtype; + a.type = cmd->subtype; + if (compat == NULL) { - bcopy(&cmd, start, sizeof(cmd)); - buf = start + sizeof(cmd); + bcopy(cmd, start, sizeof(*cmd)); + ((struct dn_id*)(start))->len = sizeof(struct dn_id); + buf = start + sizeof(*cmd); } else buf = start; a.start = &buf; @@ -1853,19 +1920,26 @@ dummynet_get(struct sockopt *sopt, void dn_ht_scan(dn_cfg.schedhash, copy_data_helper_compat, &a); a.type = DN_COMPAT_QUEUE; dn_ht_scan(dn_cfg.fshash, copy_data_helper_compat, &a); - } else if (a.type == DN_FS) + } else if (a.type == DN_FS) { dn_ht_scan(dn_cfg.fshash, copy_data_helper, &a); - else + } else { dn_ht_scan(dn_cfg.schedhash, copy_data_helper, &a); + } DN_BH_WUNLOCK(); + if (compat) { *compat = start; sopt->sopt_valsize = buf - start; /* free() is done by ip_dummynet_compat() */ + start = NULL; //XXX hack } else { error = sooptcopyout(sopt, start, buf - start); - free(start, M_DUMMYNET); } +done: + if (cmd && cmd != &r.o) + free(cmd, M_DUMMYNET); + if (start) + free(start, M_DUMMYNET); return error; } @@ -1945,8 +2019,7 @@ drain_queue_fs_cb(void *_fs, void *arg) dn_ht_scan_bucket(fs->qht, &fs->drain_bucket, drain_queue_cb, NULL); fs->drain_bucket++; - } - else { + } else { /* No hash table for this flowset, null the pointer * if the queue is deleted */