From owner-svn-src-all@FreeBSD.ORG Wed Mar 3 15:05:58 2010 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id EA153106564A; Wed, 3 Mar 2010 15:05:58 +0000 (UTC) (envelope-from gnn@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id D6E198FC29; Wed, 3 Mar 2010 15:05:58 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id o23F5wbc084260; Wed, 3 Mar 2010 15:05:58 GMT (envelope-from gnn@svn.freebsd.org) Received: (from gnn@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id o23F5wtc084253; Wed, 3 Mar 2010 15:05:58 GMT (envelope-from gnn@svn.freebsd.org) Message-Id: <201003031505.o23F5wtc084253@svn.freebsd.org> From: "George V. Neville-Neil" Date: Wed, 3 Mar 2010 15:05:58 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r204635 - in head: lib/libpmc sys/conf sys/dev/hwpmc sys/mips/include sys/sys X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 03 Mar 2010 15:05:59 -0000 Author: gnn Date: Wed Mar 3 15:05:58 2010 New Revision: 204635 URL: http://svn.freebsd.org/changeset/base/204635 Log: Add support for hwpmc(4) on the MIPS 24K, 32 bit, embedded processor. Add macros for properly accessing coprocessor 0 registers that support performance counters. Reviewed by: jkoshy rpaulo fabien imp MFC after: 1 month Added: head/lib/libpmc/pmc.mips.3 (contents, props changed) head/sys/dev/hwpmc/hwpmc_mips.c (contents, props changed) head/sys/dev/hwpmc/hwpmc_mips24k.c (contents, props changed) head/sys/dev/hwpmc/hwpmc_mips24k.h (contents, props changed) Modified: head/lib/libpmc/libpmc.c head/sys/conf/files.mips head/sys/dev/hwpmc/pmc_events.h head/sys/mips/include/cpufunc.h head/sys/mips/include/pmc_mdep.h head/sys/sys/pmc.h Modified: head/lib/libpmc/libpmc.c ============================================================================== --- head/lib/libpmc/libpmc.c Wed Mar 3 14:48:08 2010 (r204634) +++ head/lib/libpmc/libpmc.c Wed Mar 3 15:05:58 2010 (r204635) @@ -74,6 +74,12 @@ static int xscale_allocate_pmc(enum pmc_ struct pmc_op_pmcallocate *_pmc_config); #endif +#if defined(__mips__) +static int mips24k_allocate_pmc(enum pmc_event _pe, char* ctrspec, + struct pmc_op_pmcallocate *_pmc_config); +#endif /* __mips__ */ + + #define PMC_CALL(cmd, params) \ syscall(pmc_syscall, PMC_OP_##cmd, (params)) @@ -137,6 +143,7 @@ PMC_CLASSDEP_TABLE(p4, P4); PMC_CLASSDEP_TABLE(p5, P5); PMC_CLASSDEP_TABLE(p6, P6); PMC_CLASSDEP_TABLE(xscale, XSCALE); +PMC_CLASSDEP_TABLE(mips24k, MIPS24K); #undef __PMC_EV_ALIAS #define __PMC_EV_ALIAS(N,CODE) { N, PMC_EV_##CODE }, @@ -182,6 +189,7 @@ PMC_MDEP_TABLE(p4, P4, PMC_CLASS_TSC); PMC_MDEP_TABLE(p5, P5, PMC_CLASS_TSC); PMC_MDEP_TABLE(p6, P6, PMC_CLASS_TSC); PMC_MDEP_TABLE(xscale, XSCALE, PMC_CLASS_XSCALE); +PMC_MDEP_TABLE(mips24k, MIPS24K, PMC_CLASS_MIPS24K); static const struct pmc_event_descr tsc_event_table[] = { @@ -226,6 +234,10 @@ PMC_CLASS_TABLE_DESC(tsc, TSC, tsc, tsc) PMC_CLASS_TABLE_DESC(xscale, XSCALE, xscale, xscale); #endif +#if defined(__mips__) +PMC_CLASS_TABLE_DESC(mips24k, MIPS24K, mips24k, mips24k); +#endif /* __mips__ */ + #undef PMC_CLASS_TABLE_DESC static const struct pmc_class_descr **pmc_class_table; @@ -2040,6 +2052,45 @@ xscale_allocate_pmc(enum pmc_event pe, c } #endif +#if defined(__mips__) + +static struct pmc_event_alias mips24k_aliases[] = { + EV_ALIAS("instructions", "INSTR_EXECUTED"), + EV_ALIAS("branches", "BRANCH_COMPLETED"), + EV_ALIAS("branch-mispredicts", "BRANCH_MISPRED"), + EV_ALIAS(NULL, NULL) +}; + +#define MIPS24K_KW_OS "os" +#define MIPS24K_KW_USR "usr" +#define MIPS24K_KW_ANYTHREAD "anythread" + +static int +mips24k_allocate_pmc(enum pmc_event pe, char *ctrspec __unused, + struct pmc_op_pmcallocate *pmc_config __unused) +{ + char *p; + + (void) pe; + + pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE); + + while ((p = strsep(&ctrspec, ",")) != NULL) { + if (KWMATCH(p, MIPS24K_KW_OS)) + pmc_config->pm_caps |= PMC_CAP_SYSTEM; + else if (KWMATCH(p, MIPS24K_KW_USR)) + pmc_config->pm_caps |= PMC_CAP_USER; + else if (KWMATCH(p, MIPS24K_KW_ANYTHREAD)) + pmc_config->pm_caps |= (PMC_CAP_USER | PMC_CAP_SYSTEM); + else + return (-1); + } + + return (0); +} +#endif /* __mips__ */ + + /* * Match an event name `name' with its canonical form. * @@ -2371,6 +2422,10 @@ pmc_event_names_of_class(enum pmc_class ev = xscale_event_table; count = PMC_EVENT_TABLE_SIZE(xscale); break; + case PMC_CLASS_MIPS24K: + ev = mips24k_event_table; + count = PMC_EVENT_TABLE_SIZE(mips24k); + break; default: errno = EINVAL; return (-1); @@ -2563,8 +2618,12 @@ pmc_init(void) pmc_class_table[n] = &xscale_class_table_descr; break; #endif - - +#if defined(__mips__) + case PMC_CPU_MIPS_24K: + PMC_MDEP_INIT(mips24k); + pmc_class_table[n] = &mips24k_class_table_descr; + break; +#endif /* __mips__ */ default: /* * Some kind of CPU this version of the library knows nothing @@ -2681,6 +2740,10 @@ _pmc_name_of_event(enum pmc_event pe, en } else if (pe >= PMC_EV_XSCALE_FIRST && pe <= PMC_EV_XSCALE_LAST) { ev = xscale_event_table; evfence = xscale_event_table + PMC_EVENT_TABLE_SIZE(xscale); + } else if (pe >= PMC_EV_MIPS24K_FIRST && pe <= PMC_EV_MIPS24K_LAST) { + ev = mips24k_event_table; + evfence = mips24k_event_table + PMC_EVENT_TABLE_SIZE(mips24k +); } else if (pe == PMC_EV_TSC_TSC) { ev = tsc_event_table; evfence = tsc_event_table + PMC_EVENT_TABLE_SIZE(tsc); Added: head/lib/libpmc/pmc.mips.3 ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/lib/libpmc/pmc.mips.3 Wed Mar 3 15:05:58 2010 (r204635) @@ -0,0 +1,410 @@ +.\" Copyright (c) 2010 George Neville-Neil. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" This software is provided by ``as is'' and +.\" any express or implied warranties, including, but not limited to, the +.\" implied warranties of merchantability and fitness for a particular purpose +.\" are disclaimed. in no event shall George Neville-Neil be liable +.\" for any direct, indirect, incidental, special, exemplary, or consequential +.\" damages (including, but not limited to, procurement of substitute goods +.\" or services; loss of use, data, or profits; or business interruption) +.\" however caused and on any theory of liability, whether in contract, strict +.\" liability, or tort (including negligence or otherwise) arising in any way +.\" out of the use of this software, even if advised of the possibility of +.\" such damage. +.\" +.\" $FreeBSD$ +.\" +.Dd February 11, 2010 +.Os +.Dt PMC.MIPS 3 +.Sh NAME +.Nm pmc.mips +.Nd measurement events for +.Tn MIPS +family CPUs +.Sh LIBRARY +.Lb libpmc +.Sh SYNOPSIS +.In pmc.h +.Sh DESCRIPTION +MIPS PMCs are present in MIPS +.Tn "24k" +and other processors in the MIPS family. +.Pp +There are two counters supported by the hardware and each is 32 bits +wide. +.Pp +MIPS PMCs are documented in +.Rs +.%B "MIPS32 24K Processor Core Family Software User's Manual" +.%D December 2008 +.%Q "MIPS Technologies Inc." +.Re +.Ss Event Specifiers (Programmable PMCs) +MIPS programmable PMCs support the following events: +.Bl -tag -width indent +.It Li CYCLE +.Pq Event 0, Counter 0/1 +Total number of cycles. +The performance counters are clocked by the +top-level gated clock. +If the core is built with that clock gater +present, none of the counters will increment while the clock is +stopped - due to a WAIT instruction. +.It Li INSTR_EXECUTED +.Pq Event 1, Counter 0/1 +Total number of instructions completed. +.It Li BRANCH_COMPLETED +.Pq Event 2, Counter 0 +Total number of branch instructions completed. +.It Li BRANCH_MISPRED +.Pq Event 2, Counter 1 +Counts all branch instructions which completed, but were mispredicted. +.It Li RETURN +.Pq Event 3, Counter 0 +Counts all JR R31 instructions completed. +.It Li RETURN_MISPRED +.Pq Event 3, Counter 1 +Counts all JR $31 instructions which completed, used the RPS for a prediction, but were mispredicted. +.It Li RETURN_NOT_31 +.Pq Event 4, Counter 0 +Counts all JR $xx (not $31) and JALR instructions (indirect jumps). +.It Li RETURN_NOTPRED +.Pq Event 4, Counter 1 +If RPS use is disabled, JR $31 will not be predicted. +.It Li ITLB_ACCESS +.Pq Event 5, Counter 0 +Counts ITLB accesses that are due to fetches showing up in the +instruction fetch stage of the pipeline and which do not use a fixed +mapping or are not in unmapped space. +If an address is fetched twice from the pipe (as in the case of a +cache miss), that instruction willcount as 2 ITLB accesses. +Since each fetch gets us 2 instructions,there is one access marked per double +word. +.It Li ITLB_MISS +.Pq Event 5, Counter 1 +Counts all misses in the ITLB except ones that are on the back of another +miss. +We cannot process back to back misses and thus those are +ignored. +They are also ignored if there is some form of address error. +.It Li DTLB_ACCESS +.Pq Event 6, Counter 0 +Counts DTLB access including those in unmapped address spaces. +.It Li DTLB_MISS +.Pq Event 6, Counter 1 +Counts DTLB misses. Back to back misses that result in only one DTLB +entry getting refilled are counted as a single miss. +.It Li JTLB_IACCESS +.Pq Event 7, Counter 0 +Instruction JTLB accesses are counted exactly the same as ITLB misses. +.It Li JTLB_IMISS +.Pq Event 7, Counter 1 +Counts instruction JTLB accesses that result in no match or a match on +an invalid translation. +.It Li JTLB_DACCESS +.Pq Event 8, Counter 0 +Data JTLB accesses. +.It Li JTLB_DMISS +.Pq Event 8, Counter 1 +Counts data JTLB accesses that result in no match or a match on an invalid translation. +.It Li IC_FETCH +.Pq Event 9, Counter 0 +Counts every time the instruction cache is accessed. All replays, +wasted fetches etc. are counted. +For example, following a branch, even though the prediction is taken, +the fall through access is counted. + +.It Li IC_MISS +.Pq Event 9, Counter 1 +Counts all instruction cache misses that result in a bus request. +.It Li DC_LOADSTORE +.Pq Event 10, Counter 0 +Counts cached loads and stores. +.It Li DC_WRITEBACK +.Pq Event 10, Counter 1 +Counts cache lines written back to memory due to replacement or cacheops. +.It Li DC_MISS +.Pq Event 11, Counter 0/1 +Counts loads and stores that miss in the cache +.It Li LOAD_MISS +.Pq Event 13, Counter 0 +Counts number of cacheable loads that miss in the cache. +.It Li STORE_MISS +.Pq Event 13, Counter 1 +Counts number of cacheable stores that miss in the cache. +.It Li INTEGER_COMPLETED +.Pq Event 14, Counter 0 +Non-floating point, non-Coprocessor 2 instructions. +.It Li FP_COMPLETED +.Pq Event 14, Counter 1 +Floating point instructions completed. +.It Li LOAD_COMPLETED +.Pq Event 15, Counter 0 +Integer and co-processor loads completed. +.It Li STORE_COMPLETED +.Pq Event 15, Counter 1 +Integer and co-porocessor stores completed. +.It Li BARRIER_COMPLETED +.Pq Event 16, Counter 0 +Direct jump (and link) instructions completed. +.It Li MIPS16_COMPLETED +.Pq Event 16, Counter 1 +MIPS16c instructions completed. +.It Li NOP_COMPLETED +.Pq Event 17, Counter 0 +NOPs completed. +This includes all instructions that normally write to a general +purpose register, but where the destination register was set to r0. +.It Li INTEGER_MULDIV_COMPLETED +.Pq Event 17, Counter 1 +Integer multipy and divide instructions completed. (MULxx, DIVx, MADDx, MSUBx). +.It Li RF_STALL +.Pq Event 18, Counter 0 +Counts the total number of cycles where no instructions are issued +from the IFU to ALU (the RF stage does not advance) which includes +both of the previous two events. +The RT_STALL is different than the sum of them though because cycles +when both stalls are active will only be counted once. +.It Li INSTR_REFETCH +.Pq Event 18, Counter 1 +replay traps (other than uTLB) +.It Li STORE_COND_COMPLETED +.Pq Event 19, Counter 0 +Conditional stores completed. Counts all events, including failed stores. +.It Li STORE_COND_FAILED +.Pq Event 19, Counter 1 +Conditional store instruction that did not update memory. +Note: While this event and the SC instruction count event can be configured to +count in specific operating modes, the timing of the events is much +different and the observed operating mode could change between them, +causing some inaccuracy in the measured ratio. +.It Li ICACHE_REQUESTS +.Pq Event 20, Counter 0 +Note that this only counts PREFs that are actually attempted. +PREFs to uncached addresses or ones with translation errors are not counted +.It Li ICACHE_HIT +.Pq Event 20, Counter 1 +Counts PREF instructions that hit in the cache +.It Li L2_WRITEBACK +.Pq Event 21, Counter 0 +Counts cache lines written back to memory due to replacement or cacheops. +.It Li L2_ACCESS +.Pq Event 21, Counter 1 +Number of accesses to L2 Cache. +.It Li L2_MISS +.Pq Event 22, Counter 0 +Number of accesses that missed in the L2 cache. +.It Li L2_ERR_CORRECTED +.Pq Event 22, Counter 1 +Single bit errors in L2 Cache that were detected and corrected. +.It Li EXCEPTIONS +.Pq Event 23, Counter 0 +Any type of exception taken. +.It Li RF_CYCLES_STALLED +.Pq Event 24, Counter 0 +Counts cycles where the LSU is in fixup and cannot accept a new +instruction from the ALU. +Fixups are replays within the LSU that occur when an instruction needs +to re-access the cache or the DTLB. +.It Li IFU_CYCLES_STALLED +.Pq Event 25, Counter 0 +Counts the number of cycles where the fetch unit is not providing a +valid instruction to the ALU. +.It Li ALU_CYCLES_STALLED +.Pq Event 25, Counter 1 +Counts the number of cycles where the ALU pipeline cannot advance. +.It Li UNCACHED_LOAD +.Pq Event 33, Counter 0 +Counts uncached and uncached acclerated loads. +.It Li UNCACHED_STORE +.Pq Event 33, Counter 1 +Counts uncached and uncached acclerated stores. +.It Li CP2_REG_TO_REG_COMPLETED +.Pq Event 35, Counter 0 +Co-processor 2 register to register instructions completed. +.It Li MFTC_COMPLETED +.Pq Event 35, Counter 1 +Co-processor 2 move to and from instructions as well as loads and stores. +.It Li IC_BLOCKED_CYCLES +.Pq Event 37, Counter 0 +Cycles when IFU stalls because an instruction miss caused the IFU not +to have any runnable instructions. +Ignores the stalls due to ITLB misses as well as the 4 cycles +following a redirect. +.It Li DC_BLOCKED_CYCLES +.Pq Event 37, Counter 1 +Counts all cycles where integer pipeline waits on Load return data due +to a D-cache miss. +The LSU can signal a "long stall" on a D-cache misses, in which case +the waiting TC might be rescheduled so other TCs can execute +instructions till the data returns. +.It Li L2_IMISS_STALL_CYCLES +.Pq Event 38, Counter 0 +Cycles where the main pipeline is stalled waiting for a SYNC to complete. +.It Li L2_DMISS_STALL_CYCLES +.Pq Event 38, Counter 1 +Cycles where the main pipeline is stalled because of an index conflict +in the Fill Store Buffer. +.It Li DMISS_CYCLES +.Pq Event 39, Counter 0 +Data miss is outstanding, but not necessarily stalling the pipeline. +The difference between this and D$ miss stall cycles can show the gain +from non-blocking cache misses. +.It Li L2_MISS_CYCLES +.Pq Event 39, Counter 1 +L2 miss is outstanding, but not necessarily stalling the pipeline. +.It Li UNCACHED_BLOCK_CYCLES +.Pq Event 40, Counter 0 +Cycles where the processor is stalled on an uncached fetch, load, or store. +.It Li MDU_STALL_CYCLES +.Pq Event 41, Counter 0 +Cycles where the processor is stalled on an uncached fetch, load, or store. +.It Li FPU_STALL_CYCLES +.Pq Event 41, Counter 1 +Counts all cycles where integer pipeline waits on FPU return data. +.It Li CP2_STALL_CYCLES +.Pq Event 42, Counter 0 +Counts all cycles where integer pipeline waits on CP2 return data. +.It Li COREXTEND_STALL_CYCLES +.Pq Event 42, Counter 1 +Counts all cycles where integer pipeline waits on CorExtend return data. +.It Li ISPRAM_STALL_CYCLES +.Pq Event 43, Counter 0 +Count all pipeline bubbles that are a result of multicycle ISPRAM +access. +Pipeline bubbles are defined as all cycles that IFU doesn't present an +instruction to ALU. The four cycles after a redirect are not counted. +.It Li DSPRAM_STALL_CYCLES +.Pq Event 43, Counter 1 +Counts stall cycles created by an instruction waiting for access to DSPRAM. +.It Li CACHE_STALL_CYCLES +.Pq Event 44, Counter 0 +Counts all cycles the where pipeline is stalled due to CACHE +instructions. +Includes cycles where CACHE instructions themselves are +stalled in the ALU, and cycles where CACHE instructions cause +subsequent instructions to be stalled. +.It Li LOAD_TO_USE_STALLS +.Pq Event 45, Counter 0 +Counts all cycles where integer pipeline waits on Load return data. +.It Li BASE_MISPRED_STALLS +.Pq Event 45, Counter 1 +Counts stall cycles due to skewed ALU where the bypass to the address +generation takes an extra cycle. +.It Li CPO_READ_STALLS +.Pq Event 46, Counter 0 +Counts all cycles where integer pipeline waits on return data from +MFC0, RDHWR instructions. +.It Li BRANCH_MISPRED_CYCLES +.Pq Event 46, Counter 1 +This counts the number of cycles from a mispredicted branch until the +next non-delay slot instruction executes. +.It Li IFETCH_BUFFER_FULL +.Pq Event 48, Counter 0 +Counts the number of times an instruction cache miss was detected, but +both fill buffers were already allocated. +.It Li FETCH_BUFFER_ALLOCATED +.Pq Event 48, Counter 1 +Number of cycles where at least one of the IFU fill buffers is +allocated (miss pending). +.It Li EJTAG_ITRIGGER +.Pq Event 49, Counter 0 +Number of times an EJTAG Instruction Trigger Point condition matched. +.It Li EJTAG_DTRIGGER +.Pq Event 49, Counter 1 +Number of times an EJTAG Data Trigger Point condition matched. +.It Li FSB_LT_QUARTER +.Pq Event 50, Counter 0 +Fill store buffer less than one quarter full. +.It Li FSB_QUARTER_TO_HALF +.Pq Event 50, Counter 1 +Fill store buffer between one quarter and one half full. +.It Li FSB_GT_HALF +.Pq Event 51, Counter 0 +Fill store buffer more than half full. +.It Li FSB_FULL_PIPELINE_STALLS +.Pq Event 51, Counter 1 +Cycles where the pipeline is stalled because the Fill-Store Buffer in LSU is full. +.It Li LDQ_LT_QUARTER +.Pq Event 52, Counter 0 +Load data queue less than one quarter full. +.It Li LDQ_QUARTER_TO_HALF +.Pq Event 52, Counter 1 +Load data queue between one quarter and one half full. +.It Li LDQ_GT_HALF +.Pq Event 53, Counter 0 +Load data queue more than one half full. +.It Li LDQ_FULL_PIPELINE_STALLS +.Pq Event 53, Counter 1 +Cycles where the pipeline is stalled because the Load Data Queue in the LSU is full. +.It Li WBB_LT_QUARTER +.Pq Event 54, Counter 0 +Write back buffer less than one quarter full. +.It Li WBB_QUARTER_TO_HALF +.Pq Event 54, Counter 1 +Write back buffer between one quarter and one half full. +.It Li WBB_GT_HALF +.Pq Event 55, Counter 0 +Write back buffer more than one half full. +.It Li WBB_FULL_PIPELINE_STALLS +.Pq Event 55 Counter 1 +Cycles where the pipeline is stalled because the Load Data Queue in the LSU is full. +.It Li REQUEST_LATENCY +.Pq Event 61, Counter 0 +Measures latency from miss detection until critical dword of response +is returned, Only counts for cacheable reads. +.It Li REQUEST_COUNT +.Pq Event 61, Counter 1 +Counts number of cacheable read requests used for previous latency counter. +.El +.Ss Event Name Aliases +The following table shows the mapping between the PMC-independent +aliases supported by +.Lb libpmc +and the underlying hardware events used. +.Bl -column "branch-mispredicts" "cpu_clk_unhalted.core_p" +.It Em Alias Ta Em Event Ta +.It Li instructions Ta Li INSTR_EXECUTED Ta +.It Li branches Ta Li BRANCH_COMPLETED Ta +.It Li branch-mispredicts Ta Li BRANCH_MISPRED Ta +.El +.Sh SEE ALSO +.Xr pmc 3 , +.Xr pmc.atom 3 , +.Xr pmc.core 3 , +.Xr pmc.iaf 3 , +.Xr pmc.k7 3 , +.Xr pmc.k8 3 , +.Xr pmc.p4 3 , +.Xr pmc.p5 3 , +.Xr pmc.p6 3 , +.Xr pmc.tsc 3 , +.Xr pmc_cpuinfo 3 , +.Xr pmclog 3 , +.Xr hwpmc 4 +.Sh CAVEATS +The MIPS code does not yet support sampling. +.Sh HISTORY +The +.Nm pmc +library first appeared in +.Fx 6.0 . +.Sh AUTHORS +The +.Lb libpmc +library was written by +.An "Joseph Koshy" +.Aq jkoshy@FreeBSD.org . +MIPS support was added by +.An "George Neville-Neil" +.Aq gnn@FreeBSD.org . Modified: head/sys/conf/files.mips ============================================================================== --- head/sys/conf/files.mips Wed Mar 3 14:48:08 2010 (r204634) +++ head/sys/conf/files.mips Wed Mar 3 15:05:58 2010 (r204635) @@ -101,3 +101,6 @@ dev/siba/siba_cc.c optional siba dev/siba/siba_core.c optional siba dev/siba/siba_pcib.c optional siba pci #mips/sentry5/siba_mips.c optional siba # not yet + +dev/hwpmc/hwpmc_mips.c optional hwpmc +dev/hwpmc/hwpmc_mips24k.c optional hwpmc Added: head/sys/dev/hwpmc/hwpmc_mips.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/sys/dev/hwpmc/hwpmc_mips.c Wed Mar 3 15:05:58 2010 (r204635) @@ -0,0 +1,75 @@ +/*- + * Copyright (c) 2010, George V. Neville-Neil + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include + +#include +#include + +struct pmc_mdep * +pmc_md_initialize() +{ + /* if (cpu_class == CPU_CLASS_MIPS24K)*/ + return pmc_mips24k_initialize(); + /* else + return NULL;*/ +} + +void +pmc_md_finalize(struct pmc_mdep *md) +{ + /* if (cpu_class == CPU_CLASS_MIPS24K) */ + pmc_mips24k_finalize(md); + /* else + KASSERT(0, ("[mips,%d] Unknown CPU Class 0x%x", __LINE__, + cpu_class));*/ +} + +int +pmc_save_kernel_callchain(uintptr_t *cc, int maxsamples, + struct trapframe *tf) +{ + (void) cc; + (void) maxsamples; + (void) tf; + return (0); +} + +int +pmc_save_user_callchain(uintptr_t *cc, int maxsamples, + struct trapframe *tf) +{ + (void) cc; + (void) maxsamples; + (void) tf; + return (0); +} Added: head/sys/dev/hwpmc/hwpmc_mips24k.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/sys/dev/hwpmc/hwpmc_mips24k.c Wed Mar 3 15:05:58 2010 (r204635) @@ -0,0 +1,570 @@ +/*- + * Copyright (c) 2010 George V. Neville-Neil + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +#include +#include +#include +#include + +/* + * Support for MIPS CPUs + * + */ +static int mips24k_npmcs; + +struct mips24k_event_code_map { + enum pmc_event pe_ev; /* enum value */ + uint8_t pe_counter; /* Which counter this can be counted in. */ + uint8_t pe_code; /* numeric code */ +}; + +/* + * MIPS event codes are encoded with a select bit. The + * select bit is used when writing to CP0 so that we + * can select either counter 0/2 or 1/3. The cycle + * and instruction counters are special in that they + * can be counted on either 0/2 or 1/3. + */ + +#define MIPS24K_ALL 255 /* Count events in any counter. */ +#define MIPS24K_CTR_0 0 /* Counter 0 Event */ +#define MIPS24K_CTR_1 1 /* Counter 1 Event */ + +const struct mips24k_event_code_map mips24k_event_codes[] = { + { PMC_EV_MIPS24K_CYCLE, MIPS24K_ALL, 0}, + { PMC_EV_MIPS24K_INSTR_EXECUTED, MIPS24K_ALL, 1}, + { PMC_EV_MIPS24K_BRANCH_COMPLETED, MIPS24K_CTR_0, 2}, + { PMC_EV_MIPS24K_BRANCH_MISPRED, MIPS24K_CTR_1, 2}, + { PMC_EV_MIPS24K_RETURN, MIPS24K_CTR_0, 3}, + { PMC_EV_MIPS24K_RETURN_MISPRED, MIPS24K_CTR_1, 3}, + { PMC_EV_MIPS24K_RETURN_NOT_31, MIPS24K_CTR_0, 4}, + { PMC_EV_MIPS24K_RETURN_NOTPRED, MIPS24K_CTR_1, 4}, + { PMC_EV_MIPS24K_ITLB_ACCESS, MIPS24K_CTR_0, 5}, + { PMC_EV_MIPS24K_ITLB_MISS, MIPS24K_CTR_1, 5}, + { PMC_EV_MIPS24K_DTLB_ACCESS, MIPS24K_CTR_0, 6}, + { PMC_EV_MIPS24K_DTLB_MISS, MIPS24K_CTR_1, 6}, + { PMC_EV_MIPS24K_JTLB_IACCESS, MIPS24K_CTR_0, 7}, + { PMC_EV_MIPS24K_JTLB_IMISS, MIPS24K_CTR_1, 7}, + { PMC_EV_MIPS24K_JTLB_DACCESS, MIPS24K_CTR_0, 8}, + { PMC_EV_MIPS24K_JTLB_DMISS, MIPS24K_CTR_1, 8}, + { PMC_EV_MIPS24K_IC_FETCH, MIPS24K_CTR_0, 9}, + { PMC_EV_MIPS24K_IC_MISS, MIPS24K_CTR_1, 9}, + { PMC_EV_MIPS24K_DC_LOADSTORE, MIPS24K_CTR_0, 10}, + { PMC_EV_MIPS24K_DC_WRITEBACK, MIPS24K_CTR_1, 10}, + { PMC_EV_MIPS24K_DC_MISS, MIPS24K_ALL, 11}, + /* 12 reserved */ + { PMC_EV_MIPS24K_STORE_MISS, MIPS24K_CTR_0, 13}, + { PMC_EV_MIPS24K_LOAD_MISS, MIPS24K_CTR_1, 13}, + { PMC_EV_MIPS24K_INTEGER_COMPLETED, MIPS24K_CTR_0, 14}, + { PMC_EV_MIPS24K_FP_COMPLETED, MIPS24K_CTR_1, 14}, + { PMC_EV_MIPS24K_LOAD_COMPLETED, MIPS24K_CTR_0, 15}, + { PMC_EV_MIPS24K_STORE_COMPLETED, MIPS24K_CTR_1, 15}, + { PMC_EV_MIPS24K_BARRIER_COMPLETED, MIPS24K_CTR_0, 16}, + { PMC_EV_MIPS24K_MIPS16_COMPLETED, MIPS24K_CTR_1, 16}, + { PMC_EV_MIPS24K_NOP_COMPLETED, MIPS24K_CTR_0, 17}, + { PMC_EV_MIPS24K_INTEGER_MULDIV_COMPLETED, MIPS24K_CTR_1, 17}, + { PMC_EV_MIPS24K_RF_STALL, MIPS24K_CTR_0, 18}, + { PMC_EV_MIPS24K_INSTR_REFETCH, MIPS24K_CTR_1, 18}, + { PMC_EV_MIPS24K_STORE_COND_COMPLETED, MIPS24K_CTR_0, 19}, + { PMC_EV_MIPS24K_STORE_COND_FAILED, MIPS24K_CTR_1, 19}, + { PMC_EV_MIPS24K_ICACHE_REQUESTS, MIPS24K_CTR_0, 20}, + { PMC_EV_MIPS24K_ICACHE_HIT, MIPS24K_CTR_1, 20}, + { PMC_EV_MIPS24K_L2_WRITEBACK, MIPS24K_CTR_0, 21}, + { PMC_EV_MIPS24K_L2_ACCESS, MIPS24K_CTR_1, 21}, + { PMC_EV_MIPS24K_L2_MISS, MIPS24K_CTR_0, 22}, + { PMC_EV_MIPS24K_L2_ERR_CORRECTED, MIPS24K_CTR_1, 22}, + { PMC_EV_MIPS24K_EXCEPTIONS, MIPS24K_CTR_0, 23}, + /* Event 23 on COP0 1/3 is undefined */ + { PMC_EV_MIPS24K_RF_CYCLES_STALLED, MIPS24K_CTR_0, 24}, + { PMC_EV_MIPS24K_IFU_CYCLES_STALLED, MIPS24K_CTR_0, 25}, + { PMC_EV_MIPS24K_ALU_CYCLES_STALLED, MIPS24K_CTR_1, 25}, + /* Events 26 through 32 undefined or reserved to customers */ + { PMC_EV_MIPS24K_UNCACHED_LOAD, MIPS24K_CTR_0, 33}, + { PMC_EV_MIPS24K_UNCACHED_STORE, MIPS24K_CTR_1, 33}, + { PMC_EV_MIPS24K_CP2_REG_TO_REG_COMPLETED, MIPS24K_CTR_0, 35}, + { PMC_EV_MIPS24K_MFTC_COMPLETED, MIPS24K_CTR_1, 35}, + /* Event 36 reserved */ + { PMC_EV_MIPS24K_IC_BLOCKED_CYCLES, MIPS24K_CTR_0, 37}, + { PMC_EV_MIPS24K_DC_BLOCKED_CYCLES, MIPS24K_CTR_1, 37}, + { PMC_EV_MIPS24K_L2_IMISS_STALL_CYCLES, MIPS24K_CTR_0, 38}, + { PMC_EV_MIPS24K_L2_DMISS_STALL_CYCLES, MIPS24K_CTR_1, 38}, + { PMC_EV_MIPS24K_DMISS_CYCLES, MIPS24K_CTR_0, 39}, + { PMC_EV_MIPS24K_L2_MISS_CYCLES, MIPS24K_CTR_1, 39}, + { PMC_EV_MIPS24K_UNCACHED_BLOCK_CYCLES, MIPS24K_CTR_0, 40}, + { PMC_EV_MIPS24K_MDU_STALL_CYCLES, MIPS24K_CTR_0, 41}, + { PMC_EV_MIPS24K_FPU_STALL_CYCLES, MIPS24K_CTR_1, 41}, + { PMC_EV_MIPS24K_CP2_STALL_CYCLES, MIPS24K_CTR_0, 42}, + { PMC_EV_MIPS24K_COREXTEND_STALL_CYCLES, MIPS24K_CTR_1, 42}, + { PMC_EV_MIPS24K_ISPRAM_STALL_CYCLES, MIPS24K_CTR_0, 43}, + { PMC_EV_MIPS24K_DSPRAM_STALL_CYCLES, MIPS24K_CTR_1, 43}, + { PMC_EV_MIPS24K_CACHE_STALL_CYCLES, MIPS24K_CTR_0, 44}, + /* Event 44 undefined on 1/3 */ + { PMC_EV_MIPS24K_LOAD_TO_USE_STALLS, MIPS24K_CTR_0, 45}, + { PMC_EV_MIPS24K_BASE_MISPRED_STALLS, MIPS24K_CTR_1, 45}, + { PMC_EV_MIPS24K_CPO_READ_STALLS, MIPS24K_CTR_0, 46}, + { PMC_EV_MIPS24K_BRANCH_MISPRED_CYCLES, MIPS24K_CTR_1, 46}, + /* Event 47 reserved */ + { PMC_EV_MIPS24K_IFETCH_BUFFER_FULL, MIPS24K_CTR_0, 48}, + { PMC_EV_MIPS24K_FETCH_BUFFER_ALLOCATED, MIPS24K_CTR_1, 48}, + { PMC_EV_MIPS24K_EJTAG_ITRIGGER, MIPS24K_CTR_0, 49}, + { PMC_EV_MIPS24K_EJTAG_DTRIGGER, MIPS24K_CTR_1, 49}, + { PMC_EV_MIPS24K_FSB_LT_QUARTER, MIPS24K_CTR_0, 50}, + { PMC_EV_MIPS24K_FSB_QUARTER_TO_HALF, MIPS24K_CTR_1, 50}, + { PMC_EV_MIPS24K_FSB_GT_HALF, MIPS24K_CTR_0, 51}, + { PMC_EV_MIPS24K_FSB_FULL_PIPELINE_STALLS, MIPS24K_CTR_1, 51}, + { PMC_EV_MIPS24K_LDQ_LT_QUARTER, MIPS24K_CTR_0, 52}, + { PMC_EV_MIPS24K_LDQ_QUARTER_TO_HALF, MIPS24K_CTR_1, 52}, + { PMC_EV_MIPS24K_LDQ_GT_HALF, MIPS24K_CTR_0, 53}, + { PMC_EV_MIPS24K_LDQ_FULL_PIPELINE_STALLS, MIPS24K_CTR_1, 53}, + { PMC_EV_MIPS24K_WBB_LT_QUARTER, MIPS24K_CTR_0, 54}, + { PMC_EV_MIPS24K_WBB_QUARTER_TO_HALF, MIPS24K_CTR_1, 54}, + { PMC_EV_MIPS24K_WBB_GT_HALF, MIPS24K_CTR_0, 55}, + { PMC_EV_MIPS24K_WBB_FULL_PIPELINE_STALLS, MIPS24K_CTR_1, 55}, + /* Events 56-63 reserved */ + { PMC_EV_MIPS24K_REQUEST_LATENCY, MIPS24K_CTR_0, 61}, + { PMC_EV_MIPS24K_REQUEST_COUNT, MIPS24K_CTR_1, 61} + +}; + +const int mips24k_event_codes_size = + sizeof(mips24k_event_codes) / sizeof(mips24k_event_codes[0]); + +/* + * Per-processor information. + */ +struct mips24k_cpu { + struct pmc_hw *pc_mipspmcs; +}; + +static struct mips24k_cpu **mips24k_pcpu; + +/* + * Performance Count Register N + */ +static uint32_t +mips24k_pmcn_read(unsigned int pmc) +{ + uint32_t reg = 0; + + KASSERT(pmc < mips24k_npmcs, ("[mips,%d] illegal PMC number %d", + __LINE__, pmc)); + + /* The counter value is the next value after the control register. */ + switch (pmc) { + case 0: + reg = mips_rd_perfcnt1(); + break; + case 1: + reg = mips_rd_perfcnt3(); + break; + default: + return 0; + } + return (reg); +} + +static uint32_t +mips24k_pmcn_write(unsigned int pmc, uint32_t reg) +{ + + KASSERT(pmc < mips24k_npmcs, ("[mips,%d] illegal PMC number %d", + __LINE__, pmc)); + + switch (pmc) { + case 0: + mips_wr_perfcnt1(reg); + break; + case 1: + mips_wr_perfcnt3(reg); + break; + default: + return 0; + } + return (reg); +} + +static int +mips24k_allocate_pmc(int cpu, int ri, struct pmc *pm, + const struct pmc_op_pmcallocate *a) +{ + enum pmc_event pe; + uint32_t caps, config, counter; + int i; + + KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), + ("[mips,%d] illegal CPU value %d", __LINE__, cpu)); + KASSERT(ri >= 0 && ri < mips24k_npmcs, + ("[mips,%d] illegal row index %d", __LINE__, ri)); + + caps = a->pm_caps; + if (a->pm_class != PMC_CLASS_MIPS24K) + return (EINVAL); + pe = a->pm_ev; + for (i = 0; i < mips24k_event_codes_size; i++) { + if (mips24k_event_codes[i].pe_ev == pe) { + config = mips24k_event_codes[i].pe_code; + counter = mips24k_event_codes[i].pe_counter; + break; + } + } + if (i == mips24k_event_codes_size) + return (EINVAL); + + if ((counter != MIPS24K_ALL) && (counter != ri)) + return (EINVAL); + + config <<= MIPS24K_PMC_SELECT; + + if (caps & PMC_CAP_SYSTEM) + config |= (MIPS24K_PMC_SUPER_ENABLE | + MIPS24K_PMC_KERNEL_ENABLE); + if (caps & PMC_CAP_USER) + config |= MIPS24K_PMC_USER_ENABLE; + if ((caps & (PMC_CAP_USER | PMC_CAP_SYSTEM)) == 0) + config |= MIPS24K_PMC_ENABLE; + + pm->pm_md.pm_mips24k.pm_mips24k_evsel = config; + + PMCDBG(MDP,ALL,2,"mips-allocate ri=%d -> config=0x%x", ri, config); + + return 0; +} + + +static int +mips24k_read_pmc(int cpu, int ri, pmc_value_t *v) +{ + struct pmc *pm; + pmc_value_t tmp; + + KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), + ("[mips,%d] illegal CPU value %d", __LINE__, cpu)); + KASSERT(ri >= 0 && ri < mips24k_npmcs, + ("[mips,%d] illegal row index %d", __LINE__, ri)); + + pm = mips24k_pcpu[cpu]->pc_mipspmcs[ri].phw_pmc; + tmp = mips24k_pmcn_read(ri); + PMCDBG(MDP,REA,2,"mips-read id=%d -> %jd", ri, tmp); + if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) + *v = MIPS24K_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp); + else + *v = tmp; + + return 0; +} + +static int +mips24k_write_pmc(int cpu, int ri, pmc_value_t v) +{ + struct pmc *pm; + + KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), + ("[mips,%d] illegal CPU value %d", __LINE__, cpu)); + KASSERT(ri >= 0 && ri < mips24k_npmcs, + ("[mips,%d] illegal row-index %d", __LINE__, ri)); + + pm = mips24k_pcpu[cpu]->pc_mipspmcs[ri].phw_pmc; + + if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) + v = MIPS24K_RELOAD_COUNT_TO_PERFCTR_VALUE(v); + + PMCDBG(MDP,WRI,1,"mips-write cpu=%d ri=%d v=%jx", cpu, ri, v); + + mips24k_pmcn_write(ri, v); + + return 0; +} + +static int +mips24k_config_pmc(int cpu, int ri, struct pmc *pm) +{ + struct pmc_hw *phw; + + PMCDBG(MDP,CFG,1, "cpu=%d ri=%d pm=%p", cpu, ri, pm); + + KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), + ("[mips,%d] illegal CPU value %d", __LINE__, cpu)); + KASSERT(ri >= 0 && ri < mips24k_npmcs, + ("[mips,%d] illegal row-index %d", __LINE__, ri)); + + phw = &mips24k_pcpu[cpu]->pc_mipspmcs[ri]; + + KASSERT(pm == NULL || phw->phw_pmc == NULL, + ("[mips,%d] pm=%p phw->pm=%p hwpmc not unconfigured", + __LINE__, pm, phw->phw_pmc)); + + phw->phw_pmc = pm; + + return 0; +} + +static int +mips24k_start_pmc(int cpu, int ri) +{ + uint32_t config; + struct pmc *pm; + struct pmc_hw *phw; + + phw = &mips24k_pcpu[cpu]->pc_mipspmcs[ri]; + pm = phw->phw_pmc; + config = pm->pm_md.pm_mips24k.pm_mips24k_evsel; + + /* Enable the PMC. */ + switch (ri) { + case 0: + mips_wr_perfcnt0(config); + break; + case 1: + mips_wr_perfcnt2(config); + break; + default: + break; + } + + return 0; +} + +static int +mips24k_stop_pmc(int cpu, int ri) +{ + struct pmc *pm; + struct pmc_hw *phw; + + phw = &mips24k_pcpu[cpu]->pc_mipspmcs[ri]; + pm = phw->phw_pmc; + + /* + * Disable the PMCs. + * + * Clearing the entire register turns the counter off as well + * as removes the previously sampled event. *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***