Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 8 Apr 2012 21:29:48 +0000 (UTC)
From:      Davide Italiano <davide@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-9@freebsd.org
Subject:   svn commit: r234046 - in stable/9: lib/libpmc sys/dev/hwpmc sys/sys
Message-ID:  <201204082129.q38LTmvH029157@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: davide
Date: Sun Apr  8 21:29:48 2012
New Revision: 234046
URL: http://svn.freebsd.org/changeset/base/234046

Log:
  MFC: r232366
  
  - Add support for the Intel Sandy Bridge microarchitecture (both core and
    uncore counting events)
  - New manpages with event lists.
  - Add MSRs for the Intel Sandy Bridge microarchitecture
  
  Approved by:	gnn (mentor)

Added:
  stable/9/lib/libpmc/pmc.sandybridge.3
     - copied unchanged from r232366, head/lib/libpmc/pmc.sandybridge.3
  stable/9/lib/libpmc/pmc.sandybridgeuc.3
     - copied unchanged from r232366, head/lib/libpmc/pmc.sandybridgeuc.3
Modified:
  stable/9/lib/libpmc/Makefile
  stable/9/lib/libpmc/libpmc.c
  stable/9/sys/dev/hwpmc/hwpmc_core.c
  stable/9/sys/dev/hwpmc/hwpmc_intel.c
  stable/9/sys/dev/hwpmc/hwpmc_uncore.c
  stable/9/sys/dev/hwpmc/hwpmc_uncore.h
  stable/9/sys/dev/hwpmc/pmc_events.h
  stable/9/sys/sys/pmc.h
Directory Properties:
  stable/9/lib/libpmc/   (props changed)
  stable/9/sys/   (props changed)

Modified: stable/9/lib/libpmc/Makefile
==============================================================================
--- stable/9/lib/libpmc/Makefile	Sun Apr  8 20:56:47 2012	(r234045)
+++ stable/9/lib/libpmc/Makefile	Sun Apr  8 21:29:48 2012	(r234046)
@@ -35,6 +35,8 @@ MAN+=	pmc.p5.3
 MAN+=	pmc.p6.3
 MAN+=	pmc.corei7.3
 MAN+=	pmc.corei7uc.3
+MAN+=	pmc.sandybridge.3
+MAN+=	pmc.sandybridgeuc.3	
 MAN+=	pmc.westmere.3
 MAN+=	pmc.westmereuc.3
 MAN+=	pmc.tsc.3

Modified: stable/9/lib/libpmc/libpmc.c
==============================================================================
--- stable/9/lib/libpmc/libpmc.c	Sun Apr  8 20:56:47 2012	(r234045)
+++ stable/9/lib/libpmc/libpmc.c	Sun Apr  8 21:29:48 2012	(r234046)
@@ -179,6 +179,11 @@ static const struct pmc_event_descr core
 	__PMC_EV_ALIAS_COREI7()
 };
 
+static const struct pmc_event_descr sandybridge_event_table[] = 
+{
+	__PMC_EV_ALIAS_SANDYBRIDGE()
+};
+
 static const struct pmc_event_descr westmere_event_table[] =
 {
 	__PMC_EV_ALIAS_WESTMERE()
@@ -189,6 +194,11 @@ static const struct pmc_event_descr core
 	__PMC_EV_ALIAS_COREI7UC()
 };
 
+static const struct pmc_event_descr sandybridgeuc_event_table[] =
+{
+	__PMC_EV_ALIAS_SANDYBRIDGEUC()
+};
+
 static const struct pmc_event_descr westmereuc_event_table[] =
 {
 	__PMC_EV_ALIAS_WESTMEREUC()
@@ -208,6 +218,7 @@ PMC_MDEP_TABLE(atom, IAP, PMC_CLASS_IAF,
 PMC_MDEP_TABLE(core, IAP, PMC_CLASS_TSC);
 PMC_MDEP_TABLE(core2, IAP, PMC_CLASS_IAF, PMC_CLASS_TSC);
 PMC_MDEP_TABLE(corei7, IAP, PMC_CLASS_IAF, PMC_CLASS_TSC, PMC_CLASS_UCF, PMC_CLASS_UCP);
+PMC_MDEP_TABLE(sandybridge, IAP, PMC_CLASS_IAF, PMC_CLASS_TSC, PMC_CLASS_UCF, PMC_CLASS_UCP);
 PMC_MDEP_TABLE(westmere, IAP, PMC_CLASS_IAF, PMC_CLASS_TSC, PMC_CLASS_UCF, PMC_CLASS_UCP);
 PMC_MDEP_TABLE(k7, K7, PMC_CLASS_TSC);
 PMC_MDEP_TABLE(k8, K8, PMC_CLASS_TSC);
@@ -242,9 +253,11 @@ PMC_CLASS_TABLE_DESC(atom, IAP, atom, ia
 PMC_CLASS_TABLE_DESC(core, IAP, core, iap);
 PMC_CLASS_TABLE_DESC(core2, IAP, core2, iap);
 PMC_CLASS_TABLE_DESC(corei7, IAP, corei7, iap);
+PMC_CLASS_TABLE_DESC(sandybridge, IAP, sandybridge, iap);
 PMC_CLASS_TABLE_DESC(westmere, IAP, westmere, iap);
 PMC_CLASS_TABLE_DESC(ucf, UCF, ucf, ucf);
 PMC_CLASS_TABLE_DESC(corei7uc, UCP, corei7uc, ucp);
+PMC_CLASS_TABLE_DESC(sandybridgeuc, UCP, sandybridgeuc, ucp);
 PMC_CLASS_TABLE_DESC(westmereuc, UCP, westmereuc, ucp);
 #endif
 #if	defined(__i386__)
@@ -530,6 +543,8 @@ static struct pmc_event_alias core2_alia
 #define	atom_aliases_without_iaf	core2_aliases_without_iaf
 #define corei7_aliases			core2_aliases
 #define corei7_aliases_without_iaf	core2_aliases_without_iaf
+#define sandybridge_aliases		core2_aliases
+#define sandybridge_aliases_without_iaf	core2_aliases_without_iaf
 #define westmere_aliases		core2_aliases
 #define westmere_aliases_without_iaf	core2_aliases_without_iaf
 
@@ -2562,6 +2577,10 @@ pmc_event_names_of_class(enum pmc_class 
 			ev = corei7_event_table;
 			count = PMC_EVENT_TABLE_SIZE(corei7);
 			break;
+		case PMC_CPU_INTEL_SANDYBRIDGE:
+			ev = sandybridge_event_table;
+			count = PMC_EVENT_TABLE_SIZE(sandybridge);
+			break;
 		case PMC_CPU_INTEL_WESTMERE:
 			ev = westmere_event_table;
 			count = PMC_EVENT_TABLE_SIZE(westmere);
@@ -2583,6 +2602,10 @@ pmc_event_names_of_class(enum pmc_class 
 			ev = corei7uc_event_table;
 			count = PMC_EVENT_TABLE_SIZE(corei7uc);
 			break;
+		case PMC_CPU_INTEL_SANDYBRIDGE:
+			ev = sandybridgeuc_event_table;
+			count = PMC_EVENT_TABLE_SIZE(sandybridgeuc);
+			break;
 		case PMC_CPU_INTEL_WESTMERE:
 			ev = westmereuc_event_table;
 			count = PMC_EVENT_TABLE_SIZE(westmereuc);
@@ -2814,6 +2837,11 @@ pmc_init(void)
 		pmc_class_table[n++] = &corei7uc_class_table_descr;
 		PMC_MDEP_INIT_INTEL_V2(corei7);
 		break;
+	case PMC_CPU_INTEL_SANDYBRIDGE:
+		pmc_class_table[n++] = &ucf_class_table_descr;
+		pmc_class_table[n++] = &sandybridgeuc_class_table_descr;
+		PMC_MDEP_INIT_INTEL_V2(sandybridge);
+		break;
 	case PMC_CPU_INTEL_WESTMERE:
 		pmc_class_table[n++] = &ucf_class_table_descr;
 		pmc_class_table[n++] = &westmereuc_class_table_descr;
@@ -2937,6 +2965,10 @@ _pmc_name_of_event(enum pmc_event pe, en
 			ev = corei7_event_table;
 			evfence = corei7_event_table + PMC_EVENT_TABLE_SIZE(corei7);
 			break;
+		case PMC_CPU_INTEL_SANDYBRIDGE:
+			ev = sandybridge_event_table;
+			evfence = sandybridge_event_table + PMC_EVENT_TABLE_SIZE(sandybridge);
+			break;
 		case PMC_CPU_INTEL_WESTMERE:
 			ev = westmere_event_table;
 			evfence = westmere_event_table + PMC_EVENT_TABLE_SIZE(westmere);
@@ -2953,6 +2985,10 @@ _pmc_name_of_event(enum pmc_event pe, en
 			ev = corei7uc_event_table;
 			evfence = corei7uc_event_table + PMC_EVENT_TABLE_SIZE(corei7uc);
 			break;
+		case PMC_CPU_INTEL_SANDYBRIDGE:
+			ev = sandybridgeuc_event_table;
+			evfence = sandybridgeuc_event_table + PMC_EVENT_TABLE_SIZE(sandybridgeuc);
+			break;
 		case PMC_CPU_INTEL_WESTMERE:
 			ev = westmereuc_event_table;
 			evfence = westmereuc_event_table + PMC_EVENT_TABLE_SIZE(westmereuc);

Copied: stable/9/lib/libpmc/pmc.sandybridge.3 (from r232366, head/lib/libpmc/pmc.sandybridge.3)
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ stable/9/lib/libpmc/pmc.sandybridge.3	Sun Apr  8 21:29:48 2012	(r234046, copy of r232366, head/lib/libpmc/pmc.sandybridge.3)
@@ -0,0 +1,932 @@
+.\" Copyright (c) 2012 Davide Italiano <davide@FreeBSD.org>  
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd February 12, 2012
+.Dt PMC.SANDYBRIDGE 3 
+.Os 
+.Sh NAME
+.Nm pmc.sandybridge
+.Nd measurement events for 
+.Tn Intel
+.Tn Sandy Bridge
+family CPUs
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Sh DESCRIPTION
+.Tn Intel
+.Tn "Sandy Bridge"
+CPUs contain PMCs conforming to the version 3 of the
+.Tn Intel
+performance measurement architecture.
+These CPUs may contain up to three classes of PMCs:
+.Bl -tag -width "Li PMC_CLASS_IAP"
+.It Li PMC_CLASS_IAF
+Fixed-function counters that count only one hardware event per counter.
+.It Li PMC_CLASS_IAP
+Programmable counters that may be configured to count one of a defined
+set of hardware events.
+.It Li PMC_CLASS_TSC
+These PMCs are documented in
+.Xr pmc.tsc 3 .
+.El
+.Pp
+The number of PMCs available in each class and their widths need to be
+determined at run time by calling 
+.Xr pmc_cpuinfo 3 .
+.Pp
+Intel Sandy Bridge PMCs are documented in 
+.Rs
+.%B "Intel(R) 64 and IA-32 Architectures Software Developers Manual"
+.%T "Volume 3B: System Programming Guide, Part 2"
+.%N "Order Number: 253669-039US"
+.%D May 2011
+.%Q "Intel Corporation"
+.Re
+.Ss SANDY BRIDGE FIXED FUNCTION PMCS
+These PMCs and their supported events are documented in
+.Xr pmc.iaf 3 .
+.Ss SANDY BRIDGE PROGRAMMABLE PMCS
+The programmable PMCs support the following capabilities:
+.Bl -column "PMC_CAP_INTERRUPT" "Support"
+.It Em Capability Ta Em Support
+.It PMC_CAP_CASCADE Ta \&No
+.It PMC_CAP_EDGE Ta Yes 
+.It PMC_CAP_INTERRUPT Ta Yes
+.It PMC_CAP_INVERT Ta Yes
+.It PMC_CAP_READ Ta Yes
+.It PMC_CAP_PRECISE Ta \&No
+.It PMC_CAP_SYSTEM Ta Yes
+.It PMC_CAP_TAGGING Ta \&No
+.It PMC_CAP_THRESHOLD Ta Yes
+.It PMC_CAP_USER Ta Yes
+.It PMC_CAP_WRITE Ta Yes
+.El
+.Ss Event Qualifiers
+Event specifiers for these PMCs support the following common
+qualifiers:
+.Bl -tag -width indent
+.It Li rsp= Ns Ar value
+Configure the Off-core Response bits.
+.Bl -tag -width indent
+.It Li DMND_DATA_RD
+Counts the number of demand and DCU prefetch data reads of full
+and partial cachelines as well as demand data page table entry
+cacheline reads. 
+Does not count L2 data read prefetches or instruction fetches.
+.It Li DMND_RFO
+Counts the number of demand and DCU prefetch reads for ownership
+(RFO) requests generated by a write to data cacheline. 
+Does not count L2 RFO.
+.It Li DMND_IFETCH
+Counts the number of demand and DCU prefetch instruction cacheline
+reads. 
+Does not count L2 code read prefetches.
+.It Li WB
+Counts the number of writeback (modified to exclusive) transactions.
+.It Li PF_DATA_RD
+Counts the number of data cacheline reads generated by L2 prefetchers.
+.It Li PF_RFO
+Counts the number of RFO requests generated by L2 prefetchers.
+.It Li PF_IFETCH
+Counts the number of code reads generated by L2 prefetchers.
+.It Li OTHER
+Counts one of the following transaction types, including L3 invalidate,
+I/O, full or partial writes, WC or non-temporal stores, CLFLUSH, Fences,
+lock, unlock, split lock.
+.It Li UNCORE_HIT
+L3 Hit: local or remote home requests that hit L3 cache in the uncore
+with no coherency actions required (snooping).
+.It Li OTHER_CORE_HIT_SNP
+L3 Hit: local or remote home requests that hit L3 cache in the uncore
+and was serviced by another core with a cross core snoop where no modified
+copies were found (clean).
+.It Li OTHER_CORE_HITM
+L3 Hit: local or remote home requests that hit L3 cache in the uncore
+and was serviced by another core with a cross core snoop where modified
+copies were found (HITM).
+.It Li REMOTE_CACHE_FWD
+L3 Miss: local homed requests that missed the L3 cache and was serviced
+by forwarded data following a cross package snoop where no modified
+copies found. 
+(Remote home requests are not counted)
+.It Li REMOTE_DRAM
+L3 Miss: remote home requests that missed the L3 cache and were serviced
+by remote DRAM.
+.It Li LOCAL_DRAM
+L3 Miss: local home requests that missed the L3 cache and were serviced
+by local DRAM.
+.It Li NON_DRAM
+Non-DRAM requests that were serviced by IOH.
+.El
+.It Li cmask= Ns Ar value
+Configure the PMC to increment only if the number of configured
+events measured in a cycle is greater than or equal to
+.Ar value .
+.It Li edge
+Configure the PMC to count the number of de-asserted to asserted
+transitions of the conditions expressed by the other qualifiers.
+If specified, the counter will increment only once whenever a
+condition becomes true, irrespective of the number of clocks during
+which the condition remains true.
+.It Li inv
+Invert the sense of comparison when the
+.Dq Li cmask
+qualifier is present, making the counter increment when the number of
+events per cycle is less than the value specified by the
+.Dq Li cmask
+qualifier.
+.It Li os
+Configure the PMC to count events happening at processor privilege
+level 0.
+.It Li usr
+Configure the PMC to count events occurring at privilege levels 1, 2
+or 3.
+.El
+.Pp
+If neither of the
+.Dq Li os
+or
+.Dq Li usr
+qualifiers are specified, the default is to enable both.
+.Ss Event Specifiers (Programmable PMCs)
+Sandy Bridge programmable PMCs support the following events:
+.Bl -tag -width indent
+.It Li LD_BLOCKS.DATA_UNKNOWN
+.Pq EVENT_03H, Umask 01H 
+Blocked loads due to store buffer blocks with unknown data.
+.It Li LD_BLOCKS.STORE_FORWARD
+.Pq Event 03H, Umask 02H 
+Loads blocked by overlapping with store buffer that cannot be forwarded.
+.It Li LD_BLOCKS.NO_SR
+.Pq Event 03H, Umask 08H 
+# of Split loads blocked due to resource not available.
+.It Li LD_BLOCKS.ALL_BLOCK 
+.Pq EVENT_03H, Umask 10H 
+Number of cases where any load is blocked but has no DCU miss.
+.It Li  MISALIGN_MEM_REF.LOADS 
+.Pq Event 05H, Umask  01H 
+Speculative cache-line split load uops dispatched to L1D.
+.It Li MISALIGN_MEM_REF.STORES
+.Pq Event 05H, Umask  02H 
+Speculative cache-line split Store-address uops dispatched to L1D.
+.It Li LD_BLOCKS_PARTIAL.ADDRESS_ALIAS
+.Pq Event 07H, Umask  01H 
+False dependencies in MOB due to partial compare on address.
+.It Li LD_BLOCKS_PARTIAL.ALL_STA_BLOCK 
+.Pq Event 07H, Umask 08H 
+The number of times that load operations are temporarily blocked because of 
+older stores, with addresses that are not yet known. 
+A load operation may incur more than one block of this type.
+.It LI DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK
+.Pq Event 08H, Umask 01H 
+Misses in all TLB levels that cause a page walk of any page size.
+.It Li DTLB_LOAD_MISSES.WALK_COMPLETED 
+.Pq Event 08H, Umask 02H 
+Misses in all TLB levels that caused page walk completed of any size.
+.It Li DTLB_LOAD_MISSES.WALK_DURATION 
+.Pq Event 08H, Umask 04H 
+Cycle PMH is busy with a walk.
+.It Li DTLB_LOAD_MISSES.STLB_HIT
+.Pq Event 08H, Umask 10H 
+Number of cache load STLB hits. 
+No page walk.
+.It Li INT_MISC.RECOVERY_CYCLES 
+.Pq Event 0DH, Umask 03H 
+Cycles waiting to recover after Machine Clears or JEClear. 
+Set Cmask = 1.
+Set Edge to count occurrences
+.It Li INT_MISC.RAT_STALL_CYCLES 
+.Pq Event 0DH, Umask 40H 
+Cycles RAT external stall is sent to IDQ for this thread.
+.It Li UOPS_ISSUED.ANY
+.Pq Event 0EH, Umask 01H 
+Increments each cycle the # of Uops issued by the RAT to RS. 
+Set Cmask = 1, Inv = 1, Any= 1 to count stalled cycles of this core.
+Set Cmask = 1, Inv = 1 to count stalled cycles
+.It Li FP_COMP_OPS_EXE.X87 
+.Pq Event 10H, Umask 01H 
+Counts number of X87 uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_FP_PACKED_DOUBLE 
+.Pq Event 10H, Umask 10H 
+Counts number of SSE* double precision FP packed uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_FP_SCALAR_SINGLE
+.Pq Event 10H, Umask 20H 
+Counts number of SSE* single precision FP scalar uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_PACKED_SINGLE
+.Pq Event 10H, Umask 40H
+Counts number of SSE* single precision FP packed uops executed.
+.It LiFP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE 
+.Pq Event 10H, Umask 80H
+Counts number of SSE* double precision FP scalar uops executed.
+.It Li SIMD_FP_256.PACKED_SINGLE
+.Pq Event 11H, Umask 01H 
+Counts 256-bit packed single-precision floating-point instructions.
+.It Li SIMD_FP_256.PACKED_DOUBLE
+.Pq Event 11H, Umask 02H
+Counts 256-bit packed double-precision floating-point instructions.
+.It Li ARITH.FPU_DIV_ACTIVE
+.Pq Event 14H, Umask 01H 
+Cycles that the divider is active, includes INT and FP.
+Set 'edge =1, cmask=1' to count the number of divides.
+.It Li INSTS_WRITTEN_TO_IQ.INSTS 
+.Pq Event 17H, Umask 01H 
+Counts the number of instructions written into the IQ every cycle.
+.It Li L2_RQSTS.DEMAND_DATA_RD_HIT 
+.Pq Event 24H, Umask 01H 
+Demand Data Read requests that hit L2 cache.
+.It Li L2_RQSTS.ALL_DEMAND_DATA_RD
+.Pq Event 24H, Umask 03H 
+Counts any demand and L1 HW prefetch data load requests to L2.
+.It Li L2_RQSTS.RFO_HITS
+.Pq Event 24H, Umask 04H
+Counts the number of store RFO requests that hit the L2 cache.
+.It Li L2_RQSTS.RFO_MISS 
+.Pq Event 24H, Umask 08H
+Counts the number of store RFO requests that miss the L2 cache.
+.It Li L2_RQSTS.ALL_RFO 
+.Pq Event 24H, Umask 0CH 
+Counts all L2 store RFO requests.
+.It Li L2_RQSTS.CODE_RD_HIT 
+.Pq Event 24H, Umask 10H
+Number of instruction fetches that hit the L2 cache.
+.It Li L2_RQSTS.CODE_RD_MISS 
+.Pq Event 24H, Umask 20H
+Number of instruction fetches that missed the L2 cache.
+.It Li L2_RQSTS.ALL_CODE_RD 
+.Pq Event 24H, Umask 30H 
+Counts all L2 code requests.
+.It Li L2_RQSTS.PF_HIT 
+.Pq Event 24H, Umask 40H 
+Requests from L2 Hardware prefetcher that hit L2.
+.It Li L2_RQSTS.PF_MISS 
+.Pq Event 24H, Umask 80H
+Requests from L2 Hardware prefetcher that missed L2.
+.It Li L2_RQSTS.ALL_PF 
+.Pq Event 24H, Umask C0H
+Any requests from L2 Hardware prefetchers.
+.It Li L2_STORE_LOCK_RQSTS.MISS 
+.Pq Event 27H, Umask 01H 
+RFOs that miss cache lines.
+.It Li L2_STORE_LOCK_RQSTS.HIT_E 
+.Pq Event 27H, Umask 04H 
+RFOs that hit cache lines in E state.
+.It Li L2_STORE_LOCK_RQSTS.HIT_M
+.Pq EVENT_27H, Umask 08H
+RFOs that hit cache lines in M state.
+.It Li L2_STORE_LOCK_RQSTS.ALL 
+.Pq EVENT_27H, Umask 0FH 
+RFOs that access cache lines in any state.
+.It Li L2_L1D_WB_RQSTS.HIT_E 
+.Pq Event 28H, Umask 04H 
+Not rejected writebacks from L1D to L2 cache lines in E state.
+.It Li L2_L1D_WB_RQSTS.HIT_M 
+.Pq Event 28H, Umask 08H 
+Not rejected writebacks from L1D to L2 cache lines in M state.
+.It Li LONGEST_LAT_CACHE.REFERENCE 
+.Pq Event 2EH, Umask 4FH
+This event counts requests originating from the core that reference a cache 
+line in the last level cache.
+.It Li LONGEST_LAT_CACHE.MISS 
+.Pq Event 2EH, Umask 41H 
+This event counts each cache miss condition for references to the last level 
+cache.
+.It Li CPU_CLK_UNHALTED.THREAD_P 
+.Pq Event 3CH, Umask 00H 
+Counts the number of thread cycles while the thread is not in a halt state. 
+The thread enters the halt state when it is running the HLT instruction. 
+The core frequency may change from time to time due to power or thermal 
+throttling.
+.It Li CPU_CLK_THREAD_UNHALTED.REF_XCLK 
+.Pq Event 3CH, Umask 01H 
+Increments at the frequency of XCLK (100 MHz) when not halted.
+.It Li L1D_PEND_MISS.PENDING 
+.Pq Event 48H, Umask 01H 
+Increments the number of outstanding L1D misses every cycle. 
+Set Cmask = 1 and Edge =1  to count occurrences.
+Counter 2 only; Set Cmask = 1 to count cycles.
+.It Li DTLB_STORE_MISSES.MISS_CAUSES_A_WALK 
+.Pq Event 49H, Umask 01H Miss in all TLB levels causes an page walk of any 
+page size (4K/2M/4M/1G).
+.It Li DTLB_STORE_MISSES.WALK_COMPLETED 
+.Pq Event 49H, Umask 02H 
+Miss in all TLB levels causes a page walk that completes of any page size 
+(4K/2M/4M/1G).
+.It Li DTLB_STORE_MISSES.WALK_DURATION 
+.Pq Event 49H, Umask 04H 
+Cycles PMH is busy with this walk.
+.It Li DTLB_STORE_MISSES.STLB_HIT 
+.Pq Event 49H, Umask 10H 
+Store operations that miss the first TLB level but hit the second and do not 
+cause page walks.
+.It Li LOAD_HIT_PRE.SW_PF 
+.Pq Event 4CH, Umask 01H 
+Not SW-prefetch load dispatches that hit fill buffer allocated for S/W prefetch.
+.It Li LOAD_HIT_PER.HW_PF 
+.Pq Event 4CH, Umask 02H 
+Not SW-prefetch load dispatches that hit fill buffer allocated for H/W prefetch.
+.It Li HW_PRE_REQ.DL1_MISS 
+.Pq Event 4EH, Umask 02H 
+Hardware Prefetch requests that miss the L1D cache.  
+A request is being counted each time it access the cache & miss it, including 
+if a block is applicable or if hit the Fill Buffer for example.
+This accounts for both L1 streamer and IP-based (IPP) HW prefetchers.
+.It Li L1D.REPLACEMENT 
+.Pq Event 51H, Umask 01H
+Counts the number of lines brought into the L1 data cache.
+.It Li L1D.ALLOCATED_IN_M 
+.Pq Event 51H, Umask 02H 
+Counts the number of allocations of modified L1D cache lines.
+.It Li L1D.EVICTION
+.Pq Event 51H, Umask 04H
+Counts the number of modified lines evicted from the L1 data cache due to 
+replacement.
+.It Li L1D.ALL_M_REPLACEMENT
+.Pq Event 51H, Umask 08H 
+Cache lines in M state evicted out of L1D due to Snoop HitM or dirty line 
+replacement.
+.It Li PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP
+.Pq Event 59H, Umask 20H 
+Increments the number of flags-merge uops in flight each cycle.
+Set Cmask = 1 to count cycles.
+.It Li PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW
+.Pq Event 59H, Umask 40H 
+Cycles with at least one slow LEA uop allocated.
+.It Li PARTIAL_RAT_STALLS.MUL_SINGLE_UOP
+.Pq Event 59H, Umask 80H 
+Number of Multiply packed/scalar single precision uops allocated.
+.It Li RESOURCE_STALLS2.ALL_FL_EMPTY 
+.Pq Event 5BH, Umask 0CH 
+Cycles stalled due to free list empty.
+.It Li RESOURCE_STALLS2.ALL_PRF_CONTROL 
+.Pq Event 5BH, Umask 0FH 
+Cycles stalled due to control structures full for physical registers.
+.It Li RESOURCE_STALLS2.BOB_FULL 
+.Pq Event 5BH, Umask 40H 
+Cycles Allocator is stalled due to Branch Order Buffer.
+.It Li RESOURCE_STALLS2.OOO_RSRC
+.Pq Event 5BH, Umask 4FH 
+Cycles stalled due to out of order resources full.
+.It Li CPL_CYCLES.RING0
+.Pq Event 5CH, Umask 01H 
+Unhalted core cycles when the thread is in ring 0.
+Use Edge to count transition
+.It Li CPL_CYCLES.RING123
+.Pq Event 5CH, Umask 02H
+Unhalted core cycles when the thread is not in ring 0.
+.It Li RS_EVENTS.EMPTY_CYCLES
+.Pq Event 5EH, Umask 01H 
+Cycles the RS is empty for the thread.
+.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD
+.Pq Event 60H, Umask 01H 
+Offcore outstanding Demand Data Read transactions in SQ to uncore.
+Set Cmask=1 to count cycles.
+.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO
+.Pq Event 60H, Umask 04H 
+Offcore outstanding RFO store transactions in SQ to uncore.
+Set Cmask=1 to count cycles.
+.It Li OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD
+.Pq Event 60H, Umask 08H 
+Offcore outstanding cacheable data read transactions in SQ to uncore.
+Set Cmask=1 to count cycles.
+.It Li LOCK_CYCLES.SPLIT_LOCK_UC_LOCK_DURATION
+.Pq Event 63H, Umask 01H 
+Cycles in which the L1D and L2 are locked, due to a UC lock or split lock.
+.It Li LOCK_CYCLES.CACHE_LOCK_DURATION
+.Pq Event 63H, Umask 02H
+Cycles in which the L1D is locked.
+.It Li IDQ.EMPTY
+.Pq Event 79H, Umask 02H 
+Counts cycles the IDQ is empty.
+.It Li IQD.MITE_UOPS 
+.Pq Event 79H, Umask 04H 
+Increment each cycle # of uops delivered to IDQ from MITE path. 
+Set Cmask = 1 to count cycles.
+Can combine Umask 04H and 20H
+.It Li IDQ.DSB_UOPS
+.Pq Event 79H, Umask 08H 
+Increment each cycle. 
+# of uops delivered to IDQ from DSB path. 
+Set Cmask = 1 to count cycles.
+Can combine Umask 08H and 10H
+.It Li IDQ.MS_DSB_UOPS 
+.Pq Event 79H, Umask 10H 
+Increment each cycle # of uops delivered to IDQ when MS busy by DSB. 
+Set Cmask = 1 to count cycles MS is busy. 
+Set Cmask=1 and Edge=1 to count MS activations.
+Can combine Umask 08H and 10H
+.It Li IDQ.MS_MITE_UOPS
+.Pq Event 79H, Umask 20H 
+Increment each cycle # of uops delivered to IDQ when MS is busy by MITE. 
+Set Cmask = 1 to count cycles.
+Can combine Umask 04H and 20H
+.It Li IDQ.MS_UOPS 
+.Pq Event 79H, Umask 30H 
+Increment each cycle # of uops delivered to IDQ from MS by either DSB or MITE. 
+Set Cmask = 1 to count cycles.
+Can combine Umask 04H, 08H and 30H
+.It Li ICACHE.MISSES
+.Pq Event 80H, Umask 02H
+Number of Instruction Cache, Streaming Buffer and Victim Cache Misses.
+Includes UC accesses.
+.It Li ITLB_MISSES.MISS_CAUSES_A_WALK
+.Pq Event 85H, Umask 01H
+Misses in all ITLB levels that cause page walks.
+.It Li ITLB_MISSES.WALK_COMPLETED
+.Pq Event 85H, Umask 02H 
+Misses in all ITLB levels that cause completed page walks.
+.It Li ITLB_MISSES.WALK_DURATION 
+.Pq Event 85H, Umask 04H 
+Cycle PMH is busy with a walk.
+.It Li ITLB_MISSES.STLB_HIT
+.Pq Event 85H, Umask 10H 
+Number of cache load STLB hits. 
+No page walk.
+.It Li ILD_STALL.LCP
+.Pq Event 87H, Umask 01H 
+Stalls caused by changing prefix length of the instruction.
+.It Li ILD_STALL.IQ_FULL
+.Pq Event 87H, Umask 04H 
+Stall cycles due to IQ is full.
+.It Li BR_INST_EXEC.COND
+.Pq Event 88H, Umask 01H 
+Qualify conditional near branch instructions executed, but not necessarily 
+retired.
+Must combine with umask 40H, 80H
+.It Li BR_INST_EXEC.DIRECT_JMP 
+.Pq Event 88H, Umask 02H 
+Qualify all unconditional near branch instructions excluding calls and indirect 
+branches.
+Must combine with umask 80H
+.It Li BR_INST_EXEC.INDIRECT_JMP_NON_CALL_RET
+.Pq Event 88H, Umask 04H 
+Qualify executed indirect near branch instructions that are not calls nor 
+returns.
+Must combine with umask 80H
+.It Li BR_INST_EXEC.RETURN_NEAR
+.Pq Event 88H, Umask 08H 
+Qualify indirect near branches that have a return mnemonic.
+Must combine with umask 80H
+.It Li BR_INST_EXEC.DIRECT_NEAR_CALL
+.Pq Event 88H, Umask 10H
+Qualify unconditional near call branch instructions, excluding non call branch,
+executed.
+Must combine with umask 80H
+.It Li BR_INST_EXEC.INDIRECT_NEAR_CALL
+.Pq Event 88H, Umask 20H 
+Qualify indirect near calls, including both register and memory indirect, 
+executed.
+Must combine with umask 80H
+.It Li BR_INST_EXEC.NONTAKEN
+.Pq Event 88H, Umask 40H 
+Qualify non-taken near branches executed.
+Applicable to umask 01H only
+.It Li BR_INST_EXEC.TAKEN
+.Pq Event 88H, Umask 80H 
+Qualify taken near branches executed.  
+Must combine with 01H,02H, 04H, 08H, 10H, 20H
+.It Li BR_INST_EXE.ALL_BRANCHES
+.Pq Event 88H, Umask FFH 
+Counts all near executed branches (not necessarily retired).
+.It Li BR_MISP_EXEC.COND
+.Pq Event 89H, Umask 01H 
+Qualify conditional near branch instructions mispredicted. 
+Must combine with umask 40H, 80H
+.It Li BR_MISP_EXEC.INDIRECT_JMP_NON_CALL_RET
+.Pq Event 89H, Umask 04H 
+Qualify mispredicted indirect near branch instructions that are not calls nor 
+returns.
+Must combine with umask 80H
+.It Li BR_MISP_EXEC.RETURN_NEAR 
+.Pq Event 89H, Umask 08H 
+Qualify mispredicted indirect near branches that have a return mnemonic.
+Must combine with umask 80H
+.It Li BR_MISP_EXEC.DIRECT_NEAR_CALL
+.Pq Event 89H, Umask 10H 
+Qualify mispredicted unconditional near call branch instructions, excluding non 
+call branch, executed.
+Must combine with umask 80H
+.It Li BR_MISP_EXEC.INDIRECT_NEAR_CALL
+.Pq Event 89H, Umask 20H 
+Qualify mispredicted indirect near calls, including both register and memory 
+indirect, executed.
+Must combine with umask 80H
+.It Li BR_MISP_EXEC.NONTAKEN
+.Pq Event 89H, Umask 40H 
+Qualify mispredicted non-taken near branches executed.
+Applicable to umask 01H only
+.It Li BR_MISP_EXEC.TAKEN 
+.Pq Event 89H, Umask 80H
+Qualify mispredicted taken near branches executed.
+Must combine with 01H,02H, 04H, 08H, 10H, 20H
+.It Li BR_MISP_EXEC.ALL_BRANCHES
+.Pq Event 89H, Umask FFH
+Counts all near executed branches (not necessarily retired).
+.It Li IDQ_UOPS_NOT_DELIVERED.CORE
+.Pq Event 9CH, Umask 01H 
+Count number of non-delivered uops to RAT per thread.
+Use Cmask to qualify uop b/w
+.It Li UOPS_DISPATCHED_PORT.PORT_0
+.Pq Event A1H, Umask 01H
+Cycles which a Uop is dispatched on port 0.
+.It Li UOPS_DISPATCHED_PORT.PORT_1
+.Pq Event A1H, Umask 02H
+Cycles which a Uop is dispatched on port 1.
+.It Li UOPS_DISPATCHED_PORT.PORT_2_LD
+.Pq Event A1H, Umask 04H 
+Cycles which a load uop is dispatched on port 2.
+.It Li UOPS_DISPATCHED_PORT.PORT_2_STA
+.Pq Event A1H, Umask 08H
+Cycles which a store address uop is dispatched on port 2.
+.It Li UOPS_DISPATCHED_PORT.PORT_2
+.Pq Event A1H, Umask 0CH 
+Cycles which a Uop is dispatched on port 2.
+.It Li UOPS_DISPATCHED_PORT.PORT_3_LD
+.Pq Event A1H, Umask 10H 
+Cycles which a load uop is dispatched on port 3.
+.It Li UOPS_DISPATCHED_PORT.PORT_3_STA 
+.Pq Event A1H, Umask 20H 
+Cycles which a store address uop is dispatched on port 3.
+.It Li UOPS_DISPATCHED_PORT.PORT_3 
+.Pq Event A1H, Umask 30H 
+.Pq Cycles which a Uop is dispatched on port 3.
+.It Li UOPS_DISPATCHED_PORT.PORT_4 
+.Pq Event A1H, Umask 40H
+Cycles which a Uop is dispatched on port 4.
+.It Li UOPS_DISPATCHED_PORT.PORT_5
+.Pq Event A1H, Umask 80H 
+Cycles which a Uop is dispatched on port 5.
+.It Li RESOURCE_STALLS.ANY
+.Pq Event A2H, Umask 01H
+Cycles Allocation is stalled due to Resource Related reason.
+.It Li RESOURCE_STALLS.LB
+.Pq Event A2H, Umask 02H
+Counts the cycles of stall due to lack of load buffers.
+.It Li RESOURCE_STALLS.LB
+.Pq Event A2H, Umask 04H 
+Cycles stalled due to no eligible RS entry available.
+.It Li RESOURCE_STALLS.SB
+.Pq Event A2H, Umask 08H 
+Cycles stalled due to no store buffers available. 
+(not including draining form sync)
+.It Li RESOURCE_STALLS.ROB
+.Pq Event A2H, Umask 10H
+Cycles stalled due to re-order buffer full.
+.It Li RESOURCE_STALLS.FCSW
+.Pq Event A2H, Umask 20H 
+Cycles stalled due to writing the FPU control word.
+.It Li RESOURCE_STALLS.MXCSR
+.Pq Event A2H, Umask 40H
+Cycles stalled due to the MXCSR register rename occurring to close to a previous 
+MXCSR rename.
+.It Li RESOURCE_STALLS.OTHER
+.Pq Event A2H, Umask 80H 
+Cycles stalled while execution was stalled due to other resource issues.
+.It Li DSB2MITE_SWITCHES.COUNT
+.Pq Event ABH, Umask 01H
+Number of DSB to MITE switches.
+.It Li DSB2MITE_SWITCHES.PENALTY_CYCLES
+.Pq Event ABH, Umask 02H 
+Cycles DSB to MITE switches caused delay.
+.It Li DSB_FILL.OTHER_CANCEL
+.Pq Event ACH, Umask 02H
+Cases of cancelling valid DSB fill not because of exceeding way limit.
+.It Li DSB_FILL.EXCEED_DSB_LINES
+.Pq Event ACH, Umask 08H
+DSB Fill encountered > 3 DSB lines.
+.It Li DSB_FILL.ALL_CANCEL
+.Pq Event ACH, Umask 0AH 
+Cases of cancelling valid Decode Stream Buffer (DSB) fill not because of exceeding 
+way limit.
+.It Li ITLB.ITLB_FLUSH
+.Pq Event AEH, Umask 01H
+Counts the number of ITLB flushes, includes 4k/2M/4M pages.
+.It Li OFFCORE_REQUESTS.DEMAND_DATA_RD
+.Pq Event B0H, Umask 01H 
+Demand data read requests sent to uncore.
+.It Li OFFCORE_REQUESTS.DEMAND_RFO 
+.Pq Event B0H, Umask 04H 
+Demand RFO read requests sent to uncore, including regular RFOs, locks, ItoM.
+.It Li OFFCORE_REQUESTS.ALL_DATA_RD
+.Pq Event B0H, Umask 08H 
+Data read requests sent to uncore (demand and prefetch).
+.It Li UOPS_DISPATCHED.THREAD
+.Pq Event B1H, Umask 01H 
+Counts total number of uops to be dispatched per-thread each cycle.
+Set Cmask = 1, INV =1 to count stall cycles.
+.It Li UOPS_DISPATCHED.CORE
+.Pq Event B1H, Umask 02H 
+Counts total number of uops to be dispatched per-core each cycle.
+Do not need to set ANY
+.It Li OFFCORE_REQUESTS_BUFFER.SQ_FULL
+.Pq Event B2H, Umask 01H 
+Offcore requests buffer cannot take more entries for this thread core.
+.It Li AGU_BYPASS_CANCEL.COUNT
+.Pq Event B6H, Umask 01H 
+Counts executed load operations with all the following traits: 1. addressing 
+of the format [base + offset], 2. the offset is between 1 and 2047, 3. the 
+address specified in the base register is in one page and the address 
+[base+offset] is in another page.
+.It Li OFF_CORE_RESPONSE_0 
+.Pq Event B7H, Umask 01H
+Off-core Response Performance Monitoring; PMC0 only.
+Requires programming MSR 01A6H
+.It Li OFF_CORE_RESPONSE_1 
+.Pq Event BBH, Umask 01H
+Off-core Response Performance Monitoring. PMC3 only.
+Requires programming MSR 01A7H
+.It Li TLB_FLUSH.DTLB_THREAD
+.Pq Event BDH, Umask 01H
+DTLB flush attempts of the thread-specific entries.
+.It Li TLB_FLUSH.STLB_ANY
+.Pq Event BDH, Umask 20H 
+Count number of STLB flush attempts.
+.It Li L1D_BLOCKS.BANK_CONFLICT_CYCLES
+.Pq Event BFH, Umask 05H 
+Cycles when dispatched loads are cancelled due to L1D bank conflicts with other
+load ports.
+cmask=1
+.It Li INST_RETIRED.ANY_P
+.Pq Event C0H, Umask 00H 
+Number of instructions at retirement.
+.It Li INST_RETIRED.PREC_DIST
+.Pq Event C0H, Umask 01H
+Precise instruction retired event with HW to reduce effect of PEBS shadow in IP 
+distribution PMC1 only; Must quiesce other PMCs.
+.It Li INST_RETIRED.X87
+.Pq Event C0H, Umask 02H 
+X87 instruction retired event.
+.It Li OTHER_ASSISTS.ITLB_MISS_RETIRED
+.Pq Event C1H, Umask 02H 
+Instructions that experienced an ITLB miss.
+.It Li OTHER_ASSISTS.AVX_STORE
+.Pq Event C1H, Umask 08H  
+Number of assists associated with 256-bit AVX store operations.
+.It Li OTHER_ASSISTS.AVX_TO_SSE
+.Pq Event C1H, Umask 10H 
+Number of transitions from AVX256 to legacy SSE when penalty applicable.
+.It Li OTHER_ASSISTS.SSE_TO_AVX
+.Pq Event C1H, Umask 20H 
+Number of transitions from SSE to AVX-256 when penalty applicable.
+.It Li UOPS_RETIRED.ALL
+.Pq Event C2H, Umask 01H 
+Counts the number of micro-ops retired.
+Use cmask=1 and invert to count active cycles or stalled cycles.
+.It Li UOPS_RETIRED.RETIRE_SLOTS
+.Pq Event C2H, Umask 02H 
+Counts the number of retirement slots used each cycle.
+.It Li MACHINE_CLEARS.MEMORY_ORDERING
+.Pq Event C3H, Umask 02H 
+Counts the number of machine clears due to memory order conflicts.
+.It Li MACHINE_CLEARS.SMC 
+.Pq Event C3H, Umask 04H
+Counts the number of times that a program writes to a code section.
+.It Li MACHINE_CLEARS.MASKMOV
+.Pq Event C3H, Umask 20H
+Counts the number of executed AVX masked load operations that refer to an 
+illegal address range with the mask bits set to 0.
+.It Li BR_INST_RETIRED.ALL_BRANCH
+.Pq Event C4H, Umask 00H
+Branch instructions at retirement.
+.It Li BR_INST_RETIRED.CONDITIONAL
+.Pq Event C4H, Umask 01H 
+Counts the number of conditional branch instructions retired.
+.It Li BR_INST_RETIRED.NEAR_CALL
+.Pq Event C4H, Umask 02H 
+Direct and indirect near call instructions retired.
+.It Li BR_INST_RETIRED.ALL_BRANCHES
+.Pq Event C4H, Umask 04H
+Counts the number of branch instructions retired.
+.It Li BR_INST_RETIRED.NEAR_RETURN
+.Pq Event C4H, Umask 08H 
+Counts the number of near return instructions retired.
+.It Li BR_INST_RETIRED.NOT_TAKEN
+.Pq Event C4H, Umask 10H 
+Counts the number of not taken branch instructions retired.
+.It Li BR_INST_RETIRED.NEAR_TAKEN
+.Pq Event C4H, Umask 20H 
+Number of near taken branches retired.
+.It Li BR_INST_RETIRED.FAR_BRANCH
+.Pq Event C4H, Umask 40H 
+Number of far branches retired.
+.It Li BR_MISP_RETIRED.ALL_BRANCHES
+.Pq Event C5H, Umask 00H 
+Mispredicted branch instructions at retirement.
+.It Li BR_MISP_RETIRED.CONDITIONAL
+.Pq Event C5H, Umask 01H 
+Mispredicted conditional branch instructions retired.
+.It Li BR_MISP_RETIRED.NEAR_CALL
+.Pq Event C5H, Umask 02H 
+Direct and indirect mispredicted near call instructions retired.
+.It Li BR_MISP_RETIRED.ALL_BRANCH
+.Pq Event C5H, Umask 04H 
+Mispredicted macro branch instructions retired.
+.It Li BR_MISP_RETIRED.NOT_TAKEN
+.Pq Event C5H, Umask 10H 
+Mispredicted not taken branch instructions retired.
+.It Li BR_MISP_RETIRED.TAKEN
+.Pq Event C5H, Umask 20H 
+Mispredicted taken branch instructions retired.
+.It Li FP_ASSIST.X87_OUTPUT
+.Pq Event CAH, Umask 02H 
+Number of X87 assists due to output value.
+.It Li FP_ASSIST.X87_INPUT
+.Pq Event CAH, Umask 04H
+Number of X87 assists due to input value.
+.It Li FP_ASSIST.SIMD_OUTPUT
+.Pq Event CAH, Umask 08H
+Number of SIMD FP assists due to Output values.
+.It Li FP_ASSIST.SIMD_INPUT
+.Pq Event CAH, Umask 10H 
+Number of SIMD FP assists due to input values.
+.It Li FP_ASSIST.ANY
+.Pq Event CAH, Umask 1EH 
+Cycles with any input/output SSE* or FP assists.
+.It Li ROB_MISC_EVENTS.LBR_INSERTS
+.Pq Event CCH, Umask 20H 
+Count cases of saving new LBR records by hardware.
+.It Li MEM_TRANS_RETIRED.LOAD_LATENCY
+.Pq Event CDH, Umask 01H
+Sample loads with specified latency threshold. 
+PMC3 only. 
+Specify threshold in MSR 0x3F6.
+.It Li MEM_TRANS_RETIRED.PRECISE_STORE
+.Pq Event CDH, Umask 02H 
+Sample stores and collect precise store operation via PEBS record. 
+PMC3 only.
+.It Li MEM_UOP_RETIRED.LOADS
+.Pq Event D0H, Umask 01H
+Qualify retired memory uops that are loads. 
+Combine with umask 10H, 20H, 40H, 80H.
+.It Li MEM_UOP_RETIRED.STORES
+.Pq Event D0H, Umask 02H
+Qualify retired memory uops that are stores. 
+Combine with umask 10H, 20H, 40H, 80H.
+.It Li MEM_UOP_RETIRED.STLB_MISS
+.Pq Event D0H, Umask 10H
+Qualify retired memory uops with STLB miss.
+Must combine with umask 01H, 02H, to produce counts.
+.It Li MEM_UOP_RETIRED.LOCK
+.Pq Event D0H, Umask 20H
+Qualify retired memory uops with lock.
+Must combine with umask 01H, 02H, to produce counts.
+.It Li MEM_UOP_RETIRED.SPLIT
+.Pq Event D0H, Umask 40H
+Qualify retired memory uops with line split.
+Must combine with umask 01H, 02H, to produce counts. 
+.It Li MEM_UOP_RETIRED_ALL
+.Pq Event D0H, Umask 80H 
+Qualify any retired memory uops.
+Must combine with umask 01H, 02H, to produce counts.
+.It Li MEM_LOAD_UOPS_RETIRED.L1_HIT
+.Pq Event D1H, Umask 01H 
+Retired load uops with L1 cache hits as data sources.
+Must combine with umask 01H, 02H, to produce counts.
+.It Li MEM_LOAD_UOPS_RETIRED.L2_HIT
+.Pq Event D1H, Umask 02H 
+Retired load uops with L2 cache hits as data sources.
+.It Li MEM_LOAD_UOPS_RETIRED.LLC_HIT
+.Pq Event D1H, Umask 04H 
+Retired load uops which data sources were data hits in LLC without snoops 
+required.
+.It Li MEM_LOAD_UOPS_RETIRED.HIT_LFB
+.Pq Event D1H, Umask 40H 
+Retired load uops which data sources were load uops missed L1 but hit FB due 
+to preceding miss to the same cache line with data not ready.
+.It Li MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS
+.Pq Event D2H, Umask 01H 
+Retired load uops which data sources were LLC hit and cross-core snoop missed in 
+on-pkg core cache.
+.It Li MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT
+.Pq Event D2H, Umask 02H 
+Retired load uops which data sources were LLC and cross-core snoop hits in 
+on-pkg core cache.
+.It Li MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM
+.Pq Event D2H, Umask 04H 
+Retired load uops which data sources were HitM responses from shared LLC.
+.It Li MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE
+.Pq Event D2H, Umask 08H
+Retired load uops which data sources were hits in LLC without snoops required.
+.It Li MEM_LOAD_UOPS_LLC_HIT_RETIRED.LLC_MISS
+.Pq Event D4H, Umask 02H 
+Retired load uops with unknown information as data source in cache serviced the load.
+.It Li L2_TRANS.DEMAND_DATA_RD
+.Pq Event F0H, Umask 01H 
+Demand Data Read requests that access L2 cache.
+.It Li L2_TRANS.RF0
+.Pq Event F0H, Umask 02H 
+RFO requests that access L2 cache.
+.It Li L2_TRANS.CODE_RD
+.Pq Event F0H, Umask 04H 
+L2 cache accesses when fetching instructions.
+.It Li L2_TRANS.ALL_PF
+.Pq Event F0H, Umask 08H 
+L2 or LLC HW prefetches that access L2 cache.
+.It Li L2_TRANS.L1D_WB
+.Pq Event F0H, Umask 10H 
+L1D writebacks that access L2 cache.
+.It Li L2_TRANS.L2_FILL
+.Pq Event F0H, Umask 20H 
+L2 fill requests that access L2 cache.
+.It Li L2_TRANS.L2_WB
+.Pq Event F0H, Umask 40H 
+L2 writebacks that access L2 cache.
+.It Li L2_TRANS.ALL_REQUESTS
+.Pq Event F0H, Umask 80H 
+Transactions accessing L2 pipe.
+.It Li L2_LINES_IN.I
+.Pq Event F1H, Umask 01H
+L2 cache lines in I state filling L2. 
+Counting does not cover rejects.
+.It Li L2_LINES_IN.S
+.Pq Event F1H, Umask 02H 
+L2 cache lines in S state filling L2. 
+Counting does not cover rejects.
+.It Li L2_LINES_IN.E
+.Pq Event F1H, Umask 04H 
+L2 cache lines in E state filling L2. 
+Counting does not cover rejects.
+.It Li L2_LINES-IN.ALL
+.Pq Event F1H, Umask 07H 
+L2 cache lines filling L2. 

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201204082129.q38LTmvH029157>