From owner-svn-src-stable-9@FreeBSD.ORG Mon Jun 4 07:16:13 2012 Return-Path: Delivered-To: svn-src-stable-9@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 7D02F106566B; Mon, 4 Jun 2012 07:16:13 +0000 (UTC) (envelope-from mav@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 671DB8FC08; Mon, 4 Jun 2012 07:16:13 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id q547GDcY012057; Mon, 4 Jun 2012 07:16:13 GMT (envelope-from mav@svn.freebsd.org) Received: (from mav@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id q547GDLq012055; Mon, 4 Jun 2012 07:16:13 GMT (envelope-from mav@svn.freebsd.org) Message-Id: <201206040716.q547GDLq012055@svn.freebsd.org> From: Alexander Motin Date: Mon, 4 Jun 2012 07:16:13 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-9@freebsd.org X-SVN-Group: stable-9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r236547 - stable/9/sys/kern X-BeenThere: svn-src-stable-9@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for only the 9-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 04 Jun 2012 07:16:13 -0000 Author: mav Date: Mon Jun 4 07:16:12 2012 New Revision: 236547 URL: http://svn.freebsd.org/changeset/base/236547 Log: MFC r234066: Microoptimize cpu_search(). According to profiling, it makes one take 6% of CPU time on hackbench with its million of context switches per second, instead of 8% before. Modified: stable/9/sys/kern/sched_ule.c Directory Properties: stable/9/sys/ (props changed) Modified: stable/9/sys/kern/sched_ule.c ============================================================================== --- stable/9/sys/kern/sched_ule.c Mon Jun 4 07:12:36 2012 (r236546) +++ stable/9/sys/kern/sched_ule.c Mon Jun 4 07:16:12 2012 (r236547) @@ -615,32 +615,34 @@ cpu_search(const struct cpu_group *cg, s cpuset_t cpumask; struct cpu_group *child; struct tdq *tdq; - int cpu, i, hload, lload, load, total, rnd; + int cpu, i, hload, lload, load, total, rnd, *rndptr; total = 0; cpumask = cg->cg_mask; if (match & CPU_SEARCH_LOWEST) { lload = INT_MAX; - low->cs_load = INT_MAX; lgroup = *low; } if (match & CPU_SEARCH_HIGHEST) { - hload = -1; - high->cs_load = -1; + hload = INT_MIN; hgroup = *high; } /* Iterate through the child CPU groups and then remaining CPUs. */ - for (i = 0, cpu = 0; i <= cg->cg_children; ) { - if (i >= cg->cg_children) { - while (cpu <= mp_maxid && !CPU_ISSET(cpu, &cpumask)) - cpu++; - if (cpu > mp_maxid) + for (i = cg->cg_children, cpu = mp_maxid; i >= 0; ) { + if (i == 0) { + while (cpu >= 0 && !CPU_ISSET(cpu, &cpumask)) + cpu--; + if (cpu < 0) break; child = NULL; } else - child = &cg->cg_child[i]; + child = &cg->cg_child[i - 1]; + if (match & CPU_SEARCH_LOWEST) + lgroup.cs_cpu = -1; + if (match & CPU_SEARCH_HIGHEST) + hgroup.cs_cpu = -1; if (child) { /* Handle child CPU group. */ CPU_NAND(&cpumask, &child->cg_mask); switch (match) { @@ -657,23 +659,23 @@ cpu_search(const struct cpu_group *cg, s } else { /* Handle child CPU. */ tdq = TDQ_CPU(cpu); load = tdq->tdq_load * 256; - rnd = DPCPU_SET(randomval, - DPCPU_GET(randomval) * 69069 + 5) >> 26; + rndptr = DPCPU_PTR(randomval); + rnd = (*rndptr = *rndptr * 69069 + 5) >> 26; if (match & CPU_SEARCH_LOWEST) { if (cpu == low->cs_prefer) load -= 64; /* If that CPU is allowed and get data. */ - if (CPU_ISSET(cpu, &lgroup.cs_mask) && - tdq->tdq_lowpri > lgroup.cs_pri && - tdq->tdq_load <= lgroup.cs_limit) { + if (tdq->tdq_lowpri > lgroup.cs_pri && + tdq->tdq_load <= lgroup.cs_limit && + CPU_ISSET(cpu, &lgroup.cs_mask)) { lgroup.cs_cpu = cpu; lgroup.cs_load = load - rnd; } } if (match & CPU_SEARCH_HIGHEST) - if (CPU_ISSET(cpu, &hgroup.cs_mask) && - tdq->tdq_load >= hgroup.cs_limit && - tdq->tdq_transferable) { + if (tdq->tdq_load >= hgroup.cs_limit && + tdq->tdq_transferable && + CPU_ISSET(cpu, &hgroup.cs_mask)) { hgroup.cs_cpu = cpu; hgroup.cs_load = load - rnd; } @@ -682,7 +684,7 @@ cpu_search(const struct cpu_group *cg, s /* We have info about child item. Compare it. */ if (match & CPU_SEARCH_LOWEST) { - if (lgroup.cs_load != INT_MAX && + if (lgroup.cs_cpu >= 0 && (load < lload || (load == lload && lgroup.cs_load < low->cs_load))) { lload = load; @@ -691,17 +693,19 @@ cpu_search(const struct cpu_group *cg, s } } if (match & CPU_SEARCH_HIGHEST) - if (hgroup.cs_load >= 0 && + if (hgroup.cs_cpu >= 0 && (load > hload || (load == hload && hgroup.cs_load > high->cs_load))) { hload = load; high->cs_cpu = hgroup.cs_cpu; high->cs_load = hgroup.cs_load; } - if (child) - i++; - else - cpu++; + if (child) { + i--; + if (i == 0 && CPU_EMPTY(&cpumask)) + break; + } else + cpu--; } return (total); }