Date: Thu, 8 Aug 2013 04:47:46 GMT From: Alexander <Alexander.Klishin@billing.ru> To: freebsd-gnats-submit@FreeBSD.org Subject: misc/181131: sys/dev/netmap memory allocation improvement Message-ID: <201308080447.r784lkUc061198@oldred.freebsd.org> Resent-Message-ID: <201308080450.r784o0ix068280@freefall.freebsd.org>
next in thread | raw e-mail | index | archive | help
>Number: 181131 >Category: misc >Synopsis: sys/dev/netmap memory allocation improvement >Confidential: no >Severity: non-critical >Priority: low >Responsible: freebsd-bugs >State: open >Quarter: >Keywords: >Date-Required: >Class: update >Submitter-Id: current-users >Arrival-Date: Thu Aug 08 04:50:00 UTC 2013 >Closed-Date: >Last-Modified: >Originator: Alexander >Release: >Organization: www.billing.ru >Environment: >Description: Problem: netmap interface registration “ioctl(NIOCREGIF)” takes long time Improved memory allocator: * accelerated – allocate (or deallocate) needs only several memory access * simplified code Patch for netmap, FreeBSD 9, rev. 250458, is attached: netmap_mem2.c.patch.txt >How-To-Repeat: >Fix: Patch attached with submission follows: --- netmap/sys/dev/netmap/netmap_mem2.c 2013-08-07 14:42:42.000000000 +0400 +++ netmap.memalloc/sys/dev/netmap/netmap_mem2.c 2013-08-08 08:20:39.620209301 +0400 @@ -61,9 +61,9 @@ * per cluster). * * Objects are aligned to the cache line (64 bytes) rounding up object - * sizes when needed. A bitmap contains the state of each object. - * Allocation scans the bitmap; this is done only on attach, so we are not - * too worried about performance + * sizes when needed. Free objects are organised in list, netmap_obj_pool.free + * points to the first free object. Every object contains reference to the next + * free object in list. * * For each allocator we can define (thorugh sysctl) the size and * number of each object. Memory is allocated at the first use of a @@ -141,6 +141,9 @@ }, }; +typedef uint32_t objidx_t; +#define OBJIDX_END ((objidx_t)(~0U)) +#define OBJIDX_SIZE sizeof(objidx_t) struct netmap_obj_pool { char name[16]; /* name of the allocator */ @@ -161,8 +164,7 @@ u_int _memtotal; /* _numclusters*_clustsize */ struct lut_entry *lut; /* virt,phys addresses, objtotal entries */ - uint32_t *bitmap; /* one bit per buffer, 1 means free */ - uint32_t bitmap_slots; /* number of uint32 entries in bitmap */ + objidx_t free; /* head of free buffers linked list */ }; @@ -237,6 +239,7 @@ * First, find the allocator that contains the requested offset, * then locate the cluster through a lookup table. */ +#ifdef __FreeBSD__ static inline vm_paddr_t netmap_ofstophys(vm_offset_t offset) { @@ -261,34 +264,23 @@ + p[NETMAP_BUF_POOL]._memtotal); return 0; // XXX bad address } +#endif /* __FreeBSD__ */ + +/* objidx_t is stored at the end of memory block */ +#define netmap_objidx_ptr_vaddr(p, vaddr) \ + (objidx_t *)(vaddr + p->_objsize - OBJIDX_SIZE) +#define netmap_objidx_ptr(p, i) \ + (objidx_t *)(p->lut[i].vaddr + p->_objsize - OBJIDX_SIZE) /* * we store objects by kernel address, need to find the offset * within the pool to export the value to userspace. - * Algorithm: scan until we find the cluster, then add the - * actual offset in the cluster */ static ssize_t netmap_obj_offset(struct netmap_obj_pool *p, const void *vaddr) { - int i, k = p->clustentries, n = p->objtotal; - ssize_t ofs = 0; - - for (i = 0; i < n; i += k, ofs += p->_clustsize) { - const char *base = p->lut[i].vaddr; - ssize_t relofs = (const char *) vaddr - base; - - if (relofs < 0 || relofs >= p->_clustsize) - continue; - - ofs = ofs + relofs; - ND("%s: return offset %d (cluster %d) for pointer %p", - p->name, ofs, i, vaddr); - return ofs; - } - D("address %p is not contained inside any cluster (%s)", - vaddr, p->name); - return 0; /* An error occurred */ + objidx_t idx = *netmap_objidx_ptr_vaddr(p, vaddr); + return idx * p->_objsize; } /* Helper functions which convert virtual addresses to offsets */ @@ -306,15 +298,13 @@ /* - * report the index, and use start position as a hint, - * otherwise buffer allocation becomes terribly expensive. + * Allocate from pool. */ static void * -netmap_obj_malloc(struct netmap_obj_pool *p, int len, uint32_t *start, uint32_t *index) +netmap_obj_malloc(struct netmap_obj_pool *p, int len) { - uint32_t i = 0; /* index in the bitmap */ - uint32_t mask, j; /* slot counter */ void *vaddr = NULL; + objidx_t idx = 0; if (len > p->_objsize) { D("%s request size %d too large", p->name, len); @@ -326,47 +316,34 @@ D("%s allocator: run out of memory", p->name); return NULL; } - if (start) - i = *start; - - /* termination is guaranteed by p->free, but better check bounds on i */ - while (vaddr == NULL && i < p->bitmap_slots) { - uint32_t cur = p->bitmap[i]; - if (cur == 0) { /* bitmask is fully used */ - i++; - continue; - } - /* locate a slot */ - for (j = 0, mask = 1; (cur & mask) == 0; j++, mask <<= 1) - ; - - p->bitmap[i] &= ~mask; /* mark object as in use */ - p->objfree--; - vaddr = p->lut[i * 32 + j].vaddr; - if (index) - *index = i * 32 + j; - } - ND("%s allocator: allocated object @ [%d][%d]: vaddr %p", i, j, vaddr); - if (start) - *start = i; + vaddr = p->lut[p->free].vaddr; + idx = p->free; + p->free = *netmap_objidx_ptr(p, idx); /* next free buffer */ + *netmap_objidx_ptr(p, idx) = idx; /* now refers to the current buffer */ + p->objfree--; return vaddr; } /* - * free by index, not by address. This is slow, but is only used - * for a small number of objects (rings, nifp) + * free by index, not by address. */ static void -netmap_obj_free(struct netmap_obj_pool *p, uint32_t j) +netmap_obj_free(struct netmap_obj_pool *p, objidx_t j) { if (j >= p->objtotal) { D("invalid index %u, max %u", j, p->objtotal); return; } - p->bitmap[j / 32] |= (1 << (j % 32)); + if (*netmap_objidx_ptr(p, j) != j) { + D("%s allocator error: wrong index", p->name); + return; + } + /* return mem to free list */ + *netmap_objidx_ptr(p, j) = p->free; + p->free = j; p->objfree++; return; } @@ -374,37 +351,16 @@ static void netmap_obj_free_va(struct netmap_obj_pool *p, void *vaddr) { - int i, j, n = p->_memtotal / p->_clustsize; - - for (i = 0, j = 0; i < n; i++, j += p->clustentries) { - void *base = p->lut[i * p->clustentries].vaddr; - ssize_t relofs = (ssize_t) vaddr - (ssize_t) base; - - /* Given address, is out of the scope of the current cluster.*/ - if (vaddr < base || relofs >= p->_clustsize) - continue; - - j = j + relofs / p->_objsize; - KASSERT(j != 0, ("Cannot free object 0")); - netmap_obj_free(p, j); - return; - } - D("address %p is not contained inside any cluster (%s)", - vaddr, p->name); + objidx_t idx = *netmap_objidx_ptr_vaddr(p, vaddr); + netmap_obj_free(p, idx); } -#define netmap_if_malloc(len) netmap_obj_malloc(&nm_mem.pools[NETMAP_IF_POOL], len, NULL, NULL) +#define netmap_if_malloc(len) netmap_obj_malloc(&nm_mem.pools[NETMAP_IF_POOL], len) #define netmap_if_free(v) netmap_obj_free_va(&nm_mem.pools[NETMAP_IF_POOL], (v)) -#define netmap_ring_malloc(len) netmap_obj_malloc(&nm_mem.pools[NETMAP_RING_POOL], len, NULL, NULL) +#define netmap_ring_malloc(len) netmap_obj_malloc(&nm_mem.pools[NETMAP_RING_POOL], len) #define netmap_ring_free(v) netmap_obj_free_va(&nm_mem.pools[NETMAP_RING_POOL], (v)) -#define netmap_buf_malloc(_pos, _index) \ - netmap_obj_malloc(&nm_mem.pools[NETMAP_BUF_POOL], NETMAP_BUF_SIZE, _pos, _index) - - -/* Return the index associated to the given packet buffer */ -#define netmap_buf_index(v) \ - (netmap_obj_offset(&nm_mem.pools[NETMAP_BUF_POOL], (v)) / nm_mem.pools[NETMAP_BUF_POOL]._objsize) - +#define netmap_buf_malloc() \ + netmap_obj_malloc(&nm_mem.pools[NETMAP_BUF_POOL], NETMAP_BUF_SIZE) /* Return nonzero on error */ static int @@ -413,17 +369,15 @@ { struct netmap_obj_pool *p = &nm_mem.pools[NETMAP_BUF_POOL]; int i = 0; /* slot counter */ - uint32_t pos = 0; /* slot in p->bitmap */ - uint32_t index = 0; /* buffer index */ (void)nifp; /* UNUSED */ for (i = 0; i < n; i++) { - void *vaddr = netmap_buf_malloc(&pos, &index); + void *vaddr = netmap_buf_malloc(); if (vaddr == NULL) { D("unable to locate empty packet buffer"); goto cleanup; } - slot[i].buf_idx = index; + slot[i].buf_idx = *netmap_objidx_ptr_vaddr(p, vaddr); slot[i].len = p->_objsize; /* XXX setting flags=NS_BUF_CHANGED forces a pointer reload * in the NIC ring. This is a hack that hides missing @@ -432,7 +386,7 @@ // slot[i].flags = NS_BUF_CHANGED; } - ND("allocated %d buffers, %d available, first at %d", n, p->objfree, pos); + ND("allocated %d buffers, %d available", n, p->objfree); return (0); cleanup: @@ -462,9 +416,6 @@ { if (p == NULL) return; - if (p->bitmap) - free(p->bitmap, M_NETMAP); - p->bitmap = NULL; if (p->lut) { int i; for (i = 0; i < p->objtotal; i += p->clustentries) { @@ -604,16 +555,6 @@ goto clean; } - /* Allocate the bitmap */ - n = (p->objtotal + 31) / 32; - p->bitmap = malloc(sizeof(uint32_t) * n, M_NETMAP, M_NOWAIT | M_ZERO); - if (p->bitmap == NULL) { - D("Unable to create bitmap (%d entries) for allocator '%s'", n, - p->name); - goto clean; - } - p->bitmap_slots = n; - /* * Allocate clusters, init pointers and bitmap */ @@ -633,7 +574,6 @@ i, p->name); lim = i / 2; for (i--; i >= lim; i--) { - p->bitmap[ (i>>5) ] &= ~( 1 << (i & 31) ); if (i % p->clustentries == 0 && p->lut[i].vaddr) contigfree(p->lut[i].vaddr, p->_clustsize, M_NETMAP); @@ -645,12 +585,13 @@ break; } for (; i < lim; i++, clust += p->_objsize) { - p->bitmap[ (i>>5) ] |= ( 1 << (i & 31) ); p->lut[i].vaddr = clust; p->lut[i].paddr = vtophys(clust); + *netmap_objidx_ptr(p, i) = i + 1; /* linked list*/ } } - p->bitmap[0] = ~3; /* objs 0 and 1 is always busy */ + *netmap_objidx_ptr(p, i-1) = OBJIDX_END; /* linked list end */ + p->free = 2; /* objs 0 and 1 are always busy */ if (netmap_verbose) D("Pre-allocated %d clusters (%d/%dKB) for '%s'", p->_numclusters, p->_clustsize >> 10, >Release-Note: >Audit-Trail: >Unformatted:
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201308080447.r784lkUc061198>