From owner-freebsd-current@FreeBSD.ORG Thu Mar 17 09:14:12 2005 Return-Path: Delivered-To: freebsd-current@freebsd.org Received: from mx1.FreeBSD.org (mx1.freebsd.org [216.136.204.125]) by hub.freebsd.org (Postfix) with ESMTP id 0610716A4CE for ; Thu, 17 Mar 2005 09:14:12 +0000 (GMT) Received: from critter.freebsd.dk (f170.freebsd.dk [212.242.86.170]) by mx1.FreeBSD.org (Postfix) with ESMTP id 357EF43D49 for ; Thu, 17 Mar 2005 09:14:09 +0000 (GMT) (envelope-from phk@critter.freebsd.dk) Received: from critter.freebsd.dk (localhost [127.0.0.1]) by critter.freebsd.dk (8.13.1/8.13.1) with ESMTP id j2H9E7OF005642 for ; Thu, 17 Mar 2005 10:14:08 +0100 (CET) (envelope-from phk@critter.freebsd.dk) To: current@freebsd.org From: Poul-Henning Kamp Date: Thu, 17 Mar 2005 10:14:07 +0100 Message-ID: <5641.1111050847@critter.freebsd.dk> Sender: phk@critter.freebsd.dk Subject: [TEST/REVIEW/PERF] kern_thread.c tid allocation patch X-BeenThere: freebsd-current@freebsd.org X-Mailman-Version: 2.1.1 Precedence: list List-Id: Discussions about the use of FreeBSD-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 17 Mar 2005 09:14:12 -0000 While investigating a Coverity report (false alarm btw) I noticed that kern_thread has a lot of code for allocating thread id's. This patch uses subr_unit instead. I would expect thread create/destroy performance to increase on systems where there are many threads, and would be interested if anybody can measure any difference. Poul-Henning Index: kern/kern_thread.c =================================================================== RCS file: /home/ncvs/src/sys/kern/kern_thread.c,v retrieving revision 1.211 diff -u -r1.211 kern_thread.c --- kern/kern_thread.c 5 Mar 2005 09:15:03 -0000 1.211 +++ kern/kern_thread.c 17 Mar 2005 09:07:45 -0000 @@ -103,31 +103,8 @@ 0, sizeof(virtual_cpu), sysctl_kse_virtual_cpu, "I", "debug virtual cpus"); -/* - * Thread ID allocator. The allocator keeps track of assigned IDs by - * using a bitmap. The bitmap is created in parts. The parts are linked - * together. - */ -typedef u_long tid_bitmap_word; - -#define TID_IDS_PER_PART 1024 -#define TID_IDS_PER_IDX (sizeof(tid_bitmap_word) << 3) -#define TID_BITMAP_SIZE (TID_IDS_PER_PART / TID_IDS_PER_IDX) -#define TID_MIN (PID_MAX + 1) - -struct tid_bitmap_part { - STAILQ_ENTRY(tid_bitmap_part) bmp_next; - tid_bitmap_word bmp_bitmap[TID_BITMAP_SIZE]; - lwpid_t bmp_base; - int bmp_free; -}; - -static STAILQ_HEAD(, tid_bitmap_part) tid_bitmap = - STAILQ_HEAD_INITIALIZER(tid_bitmap); -static uma_zone_t tid_zone; - struct mtx tid_lock; -MTX_SYSINIT(tid_lock, &tid_lock, "TID lock", MTX_DEF); +static struct unrhdr *tid_unrhdr; /* * Prepare a thread for use. @@ -197,45 +174,10 @@ thread_init(void *mem, int size, int flags) { struct thread *td; - struct tid_bitmap_part *bmp, *new; - int bit, idx; td = (struct thread *)mem; - mtx_lock(&tid_lock); - STAILQ_FOREACH(bmp, &tid_bitmap, bmp_next) { - if (bmp->bmp_free) - break; - } - /* Create a new bitmap if we run out of free bits. */ - if (bmp == NULL) { - mtx_unlock(&tid_lock); - new = uma_zalloc(tid_zone, M_WAITOK); - mtx_lock(&tid_lock); - bmp = STAILQ_LAST(&tid_bitmap, tid_bitmap_part, bmp_next); - if (bmp == NULL || bmp->bmp_free < TID_IDS_PER_PART/2) { - /* 1=free, 0=assigned. This way we can use ffsl(). */ - memset(new->bmp_bitmap, ~0U, sizeof(new->bmp_bitmap)); - new->bmp_base = (bmp == NULL) ? TID_MIN : - bmp->bmp_base + TID_IDS_PER_PART; - new->bmp_free = TID_IDS_PER_PART; - STAILQ_INSERT_TAIL(&tid_bitmap, new, bmp_next); - bmp = new; - new = NULL; - } - } else - new = NULL; - /* We have a bitmap with available IDs. */ - idx = 0; - while (idx < TID_BITMAP_SIZE && bmp->bmp_bitmap[idx] == 0UL) - idx++; - bit = ffsl(bmp->bmp_bitmap[idx]) - 1; - td->td_tid = bmp->bmp_base + idx * TID_IDS_PER_IDX + bit; - bmp->bmp_bitmap[idx] &= ~(1UL << bit); - bmp->bmp_free--; - mtx_unlock(&tid_lock); - if (new != NULL) - uma_zfree(tid_zone, new); + td->td_tid = alloc_unr(tid_unrhdr); vm_thread_new(td, 0); cpu_thread_setup(td); @@ -254,9 +196,6 @@ thread_fini(void *mem, int size) { struct thread *td; - struct tid_bitmap_part *bmp; - lwpid_t tid; - int bit, idx; td = (struct thread *)mem; turnstile_free(td->td_turnstile); @@ -264,19 +203,7 @@ umtxq_free(td->td_umtxq); vm_thread_dispose(td); - STAILQ_FOREACH(bmp, &tid_bitmap, bmp_next) { - if (td->td_tid >= bmp->bmp_base && - td->td_tid < bmp->bmp_base + TID_IDS_PER_PART) - break; - } - KASSERT(bmp != NULL, ("No TID bitmap?")); - mtx_lock(&tid_lock); - tid = td->td_tid - bmp->bmp_base; - idx = tid / TID_IDS_PER_IDX; - bit = 1UL << (tid % TID_IDS_PER_IDX); - bmp->bmp_bitmap[idx] |= bit; - bmp->bmp_free++; - mtx_unlock(&tid_lock); + free_unr(tid_unrhdr, td->td_tid); } /* @@ -362,11 +289,12 @@ threadinit(void) { + mtx_init(&tid_lock, "TID lock", NULL, MTX_DEF); + tid_unrhdr = new_unrhdr(0, INT_MAX, &tid_lock); + thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(), thread_ctor, thread_dtor, thread_init, thread_fini, UMA_ALIGN_CACHE, 0); - tid_zone = uma_zcreate("TID", sizeof(struct tid_bitmap_part), - NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); ksegrp_zone = uma_zcreate("KSEGRP", sched_sizeof_ksegrp(), ksegrp_ctor, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); -- Poul-Henning Kamp | UNIX since Zilog Zeus 3.20 phk@FreeBSD.ORG | TCP/IP since RFC 956 FreeBSD committer | BSD since 4.3-tahoe Never attribute to malice what can adequately be explained by incompetence.