Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 19 Oct 2012 17:01:28 +0000 (UTC)
From:      Sean Bruno <sbruno@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r241738 - in head: lib/libpmc sys/dev/hwpmc sys/sys
Message-ID:  <201210191701.q9JH1S5G037802@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: sbruno
Date: Fri Oct 19 17:01:27 2012
New Revision: 241738
URL: http://svn.freebsd.org/changeset/base/241738

Log:
  Update hwpmc to support the Xeon class of Sandybridge processors.
  (Model 0x2D     /* Per Intel document 253669-044US 08/2012. */)
  
  Add manpage to document all the goodness that is available in this
  processor model.
  
  No support for uncore events at this time.
  
  Submitted by:	hiren panchasara <hiren.panchasara@gmail.com>
  Reviewed by:	jimharris@ fabient@
  Obtained from:	Yahoo! Inc.
  MFC after:	  2 weeks

Added:
  head/lib/libpmc/pmc.sandybridgexeon.3   (contents, props changed)
Modified:
  head/lib/libpmc/Makefile
  head/lib/libpmc/libpmc.c
  head/sys/dev/hwpmc/hwpmc_core.c
  head/sys/dev/hwpmc/hwpmc_intel.c
  head/sys/dev/hwpmc/pmc_events.h
  head/sys/sys/pmc.h

Modified: head/lib/libpmc/Makefile
==============================================================================
--- head/lib/libpmc/Makefile	Fri Oct 19 14:49:42 2012	(r241737)
+++ head/lib/libpmc/Makefile	Fri Oct 19 17:01:27 2012	(r241738)
@@ -39,6 +39,7 @@ MAN+=	pmc.corei7.3
 MAN+=	pmc.corei7uc.3
 MAN+=	pmc.sandybridge.3
 MAN+=	pmc.sandybridgeuc.3	
+MAN+=	pmc.sandybridgexeon.3	
 MAN+=	pmc.westmere.3
 MAN+=	pmc.westmereuc.3
 MAN+=	pmc.tsc.3

Modified: head/lib/libpmc/libpmc.c
==============================================================================
--- head/lib/libpmc/libpmc.c	Fri Oct 19 14:49:42 2012	(r241737)
+++ head/lib/libpmc/libpmc.c	Fri Oct 19 17:01:27 2012	(r241738)
@@ -193,6 +193,11 @@ static const struct pmc_event_descr sand
 	__PMC_EV_ALIAS_SANDYBRIDGE()
 };
 
+static const struct pmc_event_descr sandybridge_xeon_event_table[] = 
+{
+	__PMC_EV_ALIAS_SANDYBRIDGE_XEON()
+};
+
 static const struct pmc_event_descr westmere_event_table[] =
 {
 	__PMC_EV_ALIAS_WESTMERE()
@@ -229,6 +234,7 @@ PMC_MDEP_TABLE(core2, IAP, PMC_CLASS_SOF
 PMC_MDEP_TABLE(corei7, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC, PMC_CLASS_UCF, PMC_CLASS_UCP);
 PMC_MDEP_TABLE(ivybridge, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC);
 PMC_MDEP_TABLE(sandybridge, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC, PMC_CLASS_UCF, PMC_CLASS_UCP);
+PMC_MDEP_TABLE(sandybridge_xeon, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC);
 PMC_MDEP_TABLE(westmere, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC, PMC_CLASS_UCF, PMC_CLASS_UCP);
 PMC_MDEP_TABLE(k7, K7, PMC_CLASS_SOFT, PMC_CLASS_TSC);
 PMC_MDEP_TABLE(k8, K8, PMC_CLASS_SOFT, PMC_CLASS_TSC);
@@ -267,6 +273,7 @@ PMC_CLASS_TABLE_DESC(core2, IAP, core2, 
 PMC_CLASS_TABLE_DESC(corei7, IAP, corei7, iap);
 PMC_CLASS_TABLE_DESC(ivybridge, IAP, ivybridge, iap);
 PMC_CLASS_TABLE_DESC(sandybridge, IAP, sandybridge, iap);
+PMC_CLASS_TABLE_DESC(sandybridge_xeon, IAP, sandybridge_xeon, iap);
 PMC_CLASS_TABLE_DESC(westmere, IAP, westmere, iap);
 PMC_CLASS_TABLE_DESC(ucf, UCF, ucf, ucf);
 PMC_CLASS_TABLE_DESC(corei7uc, UCP, corei7uc, ucp);
@@ -572,6 +579,8 @@ static struct pmc_event_alias core2_alia
 #define ivybridge_aliases_without_iaf	core2_aliases_without_iaf
 #define sandybridge_aliases		core2_aliases
 #define sandybridge_aliases_without_iaf	core2_aliases_without_iaf
+#define sandybridge_xeon_aliases	core2_aliases
+#define sandybridge_xeon_aliases_without_iaf	core2_aliases_without_iaf
 #define westmere_aliases		core2_aliases
 #define westmere_aliases_without_iaf	core2_aliases_without_iaf
 
@@ -691,7 +700,7 @@ static struct pmc_masks iap_rsp_mask_i7_
 	NULLMASK
 };
 
-static struct pmc_masks iap_rsp_mask_sb_ib[] = {
+static struct pmc_masks iap_rsp_mask_sb_sbx_ib[] = {
 	PMCMASK(REQ_DMND_DATA_RD,	(1ULL <<  0)),
 	PMCMASK(REQ_DMND_RFO,		(1ULL <<  1)),
 	PMCMASK(REQ_DMND_IFETCH,	(1ULL <<  2)),
@@ -797,9 +806,10 @@ iap_allocate_pmc(enum pmc_event pe, char
 			} else
 				return (-1);
 		} else if (cpu_info.pm_cputype == PMC_CPU_INTEL_SANDYBRIDGE ||
-		    cpu_info.pm_cputype == PMC_CPU_INTEL_IVYBRIDGE) {
+		    cpu_info.pm_cputype == PMC_CPU_INTEL_SANDYBRIDGE_XEON ||
+			cpu_info.pm_cputype == PMC_CPU_INTEL_IVYBRIDGE) {
 			if (KWPREFIXMATCH(p, IAP_KW_RSP "=")) {
-				n = pmc_parse_mask(iap_rsp_mask_sb_ib, p, &rsp);
+				n = pmc_parse_mask(iap_rsp_mask_sb_sbx_ib, p, &rsp);
 			} else
 				return (-1);
 		} else
@@ -2678,6 +2688,10 @@ pmc_event_names_of_class(enum pmc_class 
 			ev = sandybridge_event_table;
 			count = PMC_EVENT_TABLE_SIZE(sandybridge);
 			break;
+		case PMC_CPU_INTEL_SANDYBRIDGE_XEON:
+			ev = sandybridge_xeon_event_table;
+			count = PMC_EVENT_TABLE_SIZE(sandybridge_xeon);
+			break;
 		case PMC_CPU_INTEL_WESTMERE:
 			ev = westmere_event_table;
 			count = PMC_EVENT_TABLE_SIZE(westmere);
@@ -2974,6 +2988,9 @@ pmc_init(void)
 		pmc_class_table[n++] = &sandybridgeuc_class_table_descr;
 		PMC_MDEP_INIT_INTEL_V2(sandybridge);
 		break;
+	case PMC_CPU_INTEL_SANDYBRIDGE_XEON:
+		PMC_MDEP_INIT_INTEL_V2(sandybridge_xeon);
+		break;
 	case PMC_CPU_INTEL_WESTMERE:
 		pmc_class_table[n++] = &ucf_class_table_descr;
 		pmc_class_table[n++] = &westmereuc_class_table_descr;
@@ -3112,6 +3129,10 @@ _pmc_name_of_event(enum pmc_event pe, en
 			ev = sandybridge_event_table;
 			evfence = sandybridge_event_table + PMC_EVENT_TABLE_SIZE(sandybridge);
 			break;
+		case PMC_CPU_INTEL_SANDYBRIDGE_XEON:
+			ev = sandybridge_xeon_event_table;
+			evfence = sandybridge_xeon_event_table + PMC_EVENT_TABLE_SIZE(sandybridge_xeon);
+			break;
 		case PMC_CPU_INTEL_WESTMERE:
 			ev = westmere_event_table;
 			evfence = westmere_event_table + PMC_EVENT_TABLE_SIZE(westmere);

Added: head/lib/libpmc/pmc.sandybridgexeon.3
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/lib/libpmc/pmc.sandybridgexeon.3	Fri Oct 19 17:01:27 2012	(r241738)
@@ -0,0 +1,1023 @@
+.\" Copyright (c) 2012 Hiren Panchasara <hiren.panchasara@gmail.com>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd October 18, 2012
+.Dt PMC.SANDYBRIDGEXEON 3
+.Os
+.Sh NAME
+.Nm pmc.sandybridgexeon
+.Nd measurement events for
+.Tn Intel
+.Tn Sandy Bridge Xeon
+family CPUs
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Sh DESCRIPTION
+.Tn Intel
+.Tn "Sandy Bridge Xeon"
+CPUs contain PMCs conforming to version 2 of the
+.Tn Intel
+performance measurement architecture.
+These CPUs may contain up to two classes of PMCs:
+.Bl -tag -width "Li PMC_CLASS_IAP"
+.It Li PMC_CLASS_IAF
+Fixed-function counters that count only one hardware event per counter.
+.It Li PMC_CLASS_IAP
+Programmable counters that may be configured to count one of a defined
+set of hardware events.
+.El
+.Pp
+The number of PMCs available in each class and their widths need to be
+determined at run time by calling
+.Xr pmc_cpuinfo 3 .
+.Pp
+Intel Sandy Bridge Xeon PMCs are documented in
+.Rs
+.%B "Intel(R) 64 and IA-32 Architectures Software Developer's Manual"
+.%T "Volume 3B: System Programming Guide, Part 2"
+.%N "Order Number: 253669-043US"
+.%D August 2012
+.%Q "Intel Corporation"
+.Re
+.Ss SANDYBRIDGE XEON FIXED FUNCTION PMCS
+These PMCs and their supported events are documented in
+.Xr pmc.iaf 3 .
+.Ss SANDYBRIDGE XEON PROGRAMMABLE PMCS
+The programmable PMCs support the following capabilities:
+.Bl -column "PMC_CAP_INTERRUPT" "Support"
+.It Em Capability Ta Em Support
+.It PMC_CAP_CASCADE Ta \&No
+.It PMC_CAP_EDGE Ta Yes
+.It PMC_CAP_INTERRUPT Ta Yes
+.It PMC_CAP_INVERT Ta Yes
+.It PMC_CAP_READ Ta Yes
+.It PMC_CAP_PRECISE Ta \&No
+.It PMC_CAP_SYSTEM Ta Yes
+.It PMC_CAP_TAGGING Ta \&No
+.It PMC_CAP_THRESHOLD Ta Yes
+.It PMC_CAP_USER Ta Yes
+.It PMC_CAP_WRITE Ta Yes
+.El
+.Ss Event Qualifiers
+Event specifiers for these PMCs support the following common
+qualifiers:
+.Bl -tag -width indent
+.It Li rsp= Ns Ar value
+Configure the Off-core Response bits.
+.Bl -tag -width indent
+.It Li REQ_DMND_DATA_RD
+Counts the number of demand and DCU prefetch data reads of full and partial
+cachelines as well as demand data page table entry cacheline reads. Does not
+count L2 data read prefetches or instruction fetches.
+.It Li REQ_DMND_RFO
+Counts the number of demand and DCU prefetch reads for ownership (RFO)
+requests generated by a write to data cacheline. Does not count L2 RFO
+prefetches.
+.It Li REQ_DMND_IFETCH
+Counts the number of demand and DCU prefetch instruction cacheline reads.
+Does not count L2 code read prefetches.
+.It Li REQ_WB
+Counts the number of writeback (modified to exclusive) transactions.
+.It Li REQ_PF_DATA_RD
+Counts the number of data cacheline reads generated by L2 prefetchers.
+.It Li REQ_PF_RFO
+Counts the number of RFO requests generated by L2 prefetchers.
+.It Li REQ_PF_IFETCH
+Counts the number of code reads generated by L2 prefetchers.
+.It Li REQ_PF_LLC_DATA_RD
+L2 prefetcher to L3 for loads.
+.It Li REQ_PF_LLC_RFO
+RFO requests generated by L2 prefetcher
+.It Li REQ_PF_LLC_IFETCH
+L2 prefetcher to L3 for instruction fetches.
+.It Li REQ_BUS_LOCKS
+Bus lock and split lock requests.
+.It Li REQ_STRM_ST
+Streaming store requests.
+.It Li REQ_OTHER
+Any other request that crosses IDI, including I/O.
+.It Li RES_ANY
+Catch all value for any response types.
+.It Li RES_SUPPLIER_NO_SUPP
+No Supplier Information available.
+.It Li RES_SUPPLIER_LLC_HITM
+M-state initial lookup stat in L3.
+.It Li RES_SUPPLIER_LLC_HITE
+E-state.
+.It Li RES_SUPPLIER_LLC_HITS
+S-state.
+.It Li RES_SUPPLIER_LLC_HITF
+F-state.
+.It Li RES_SUPPLIER_LOCAL
+Local DRAM Controller.
+.It Li RES_SNOOP_SNPI_NONE
+No details on snoop-related information.
+.It Li RES_SNOOP_SNP_NO_NEEDED
+No snoop was needed to satisfy the request.
+.It Li RES_SNOOP_SNP_MISS
+A snoop was needed and it missed all snooped caches:
+-For LLC Hit, ReslHitl was returned by all cores
+-For LLC Miss, Rspl was returned by all sockets and data was returned from
+DRAM.
+.It Li RES_SNOOP_HIT_NO_FWD
+A snoop was needed and it hits in at least one snooped cache. Hit denotes a
+cache-line was valid before snoop effect. This includes:
+-Snoop Hit w/ Invalidation (LLC Hit, RFO)
+-Snoop Hit, Left Shared (LLC Hit/Miss, IFetch/Data_RD)
+-Snoop Hit w/ Invalidation and No Forward (LLC Miss, RFO Hit S)
+In the LLC Miss case, data is returned from DRAM.
+.It Li RES_SNOOP_HIT_FWD
+A snoop was needed and data was forwarded from a remote socket.
+This includes:
+-Snoop Forward Clean, Left Shared (LLC Hit/Miss, IFetch/Data_RD/RFT).
+.It Li RES_SNOOP_HITM
+A snoop was needed and it HitM-ed in local or remote cache. HitM denotes a
+cache-line was in modified state before effect as a results of snoop. This
+includes:
+-Snoop HitM w/ WB (LLC miss, IFetch/Data_RD)
+-Snoop Forward Modified w/ Invalidation (LLC Hit/Miss, RFO)
+-Snoop MtoS (LLC Hit, IFetch/Data_RD).
+.It Li RES_NON_DRAM
+Target was non-DRAM system address. This includes MMIO transactions.
+.El
+.It Li cmask= Ns Ar value
+Configure the PMC to increment only if the number of configured
+events measured in a cycle is greater than or equal to
+.Ar value .
+.It Li edge
+Configure the PMC to count the number of de-asserted to asserted
+transitions of the conditions expressed by the other qualifiers.
+If specified, the counter will increment only once whenever a
+condition becomes true, irrespective of the number of clocks during
+which the condition remains true.
+.It Li inv
+Invert the sense of comparison when the
+.Dq Li cmask
+qualifier is present, making the counter increment when the number of
+events per cycle is less than the value specified by the
+.Dq Li cmask
+qualifier.
+.It Li os
+Configure the PMC to count events happening at processor privilege
+level 0.
+.It Li usr
+Configure the PMC to count events occurring at privilege levels 1, 2
+or 3.
+.El
+.Pp
+If neither of the
+.Dq Li os
+or
+.Dq Li usr
+qualifiers are specified, the default is to enable both.
+.Ss Event Specifiers (Programmable PMCs)
+Sandy Bridge Xeon programmable PMCs support the following events:
+.Bl -tag -width indent
+.It Li LD_BLOCKS.DATA_UNKNOWN
+.Pq Event 03H , Umask 01H
+blocked loads due to store buffer blocks with unknown data.
+.It Li LD_BLOCKS.STORE_FORWARD
+.Pq Event 03H , Umask 02H
+loads blocked by overlapping with store buffer that cannot
+be forwarded .
+.It Li LD_BLOCKS.NO_SR
+.Pq Event 03H , Umask 08H
+# of Split loads blocked due to resource not available.
+.It Li LD_BLOCKS.ALL_BLOCK
+.Pq Event 03H , Umask 10H
+Number of cases where any load is blocked but has no
+DCU miss.
+.It Li MISALIGN_MEM_REF.LOADS
+.Pq Event 05H , Umask 01H
+Speculative cache-line split load uops dispatched to
+L1D.
+.It Li MISALIGN_MEM_REF.STORES
+.Pq Event 05H , Umask 02H
+Speculative cache-line split Store- address uops
+dispatchedto L1D.
+.It Li LD_BLOCKS_PARTIAL.ADDRESS_ALIAS
+.Pq Event 07H , Umask 01H
+False dependencies in MOB due to partial compare on
+address.
+.It Li LD_BLOCKS_PARTIAL.ALL_STALL_BLOCK
+.Pq Event 07H , Umask 08H
+The number of times that load operations are temporarily
+blocked because of older stores, with addresses that are
+not yet known.  A load operation may incur more than one
+block of this type.
+.It Li TLB_LOAD_MISSES.MISS_CAUSES_A_WALK
+.Pq Event 08H , Umask 01H
+Misses in all TLB levels that cause a page walk of any
+page size.
+.It Li TLB_LOAD_MISSES.WALK_COMPLETED
+.Pq Event 08H , Umask 02H
+Misses in all TLB levels that caused page walk completed
+of any size.
+.It Li DTLB_LOAD_MISSES.WALK_DURATION
+.Pq Event 08H , Umask 04H
+Cycle PMH is busy with a walk.
+.It Li DTLB_LOAD_MISSES.STLB_HIT
+.Pq Event 08H , Umask 10H
+Number of cache load STLB hits. No page walk.
+.It Li INT_MISC.RECOVERY_CYCLES
+.Pq Event 0DH , Umask 03H
+Cycles waiting to recover after Machine Clears or EClear.
+Set Cmask= 1.
+.It Li INT_MISC.RAT_STALL_CYCLES
+.Pq Event 0DH , Umask 40H
+Cycles RAT external stall is sent to IDQ for this thread.
+.It Li UOPS_ISSUED.ANY
+.Pq Event 0EH , Umask 01H
+Increments each cycle the # of Uops issued by the
+RAT to RS.
+Set Cmask = 1, Inv = 1, Any= 1to count stalled cycles
+of this core.
+.It Li FP_COMP_OPS_EXE.X87
+.Pq Event 10H , Umask 01H
+Counts number of X87 uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_FP_PACKED_DOUBLE
+.Pq Event 10H , Umask 10H
+Counts number of SSE* double precision FP packed
+uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_FP_SCALAR_SINGLE
+.Pq Event 10H , Umask 20H
+Counts number of SSE* single precision FP scalar
+uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_PACKED_SINGLE
+.Pq Event 10H , Umask 40H
+Counts number of SSE* single precision FP packed
+uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE
+.Pq Event 10H , Umask 80H
+Counts number of SSE* double precision FP scalar
+uops executed.
+.It Li SIMD_FP_256.PACKED_SINGLE
+.Pq Event 11H , Umask 01H
+Counts 256-bit packed single-precision floating-
+point instructions.
+.It Li SIMD_FP_256.PACKED_DOUBLE
+.Pq Event 11H , Umask 02H
+Counts 256-bit packed double-precision floating-
+point instructions.
+.It Li ARITH.FPU_DIV_ACTIVE
+.Pq Event 14H , Umask 01H
+Cycles that the divider is active, includes INT and FP.
+Set 'edge =1, cmask=1' to count the number of
+divides.
+.It Li INSTS_WRITTEN_TO_IQ.INSTS
+.Pq Event 17H , Umask 01H
+Counts the number of instructions written into the
+IQ every cycle.
+.It Li L2_RQSTS.DEMAND_DATA_RD_HIT
+.Pq Event 24H , Umask 01H
+Demand Data Read requests that hit L2 cache.
+.It Li L2_RQSTS.ALL_DEMAND_DATA_RD
+.Pq Event 24H , Umask 03H
+Counts any demand and L1 HW prefetch data load
+requests to L2.
+.It Li L2_RQSTS.RFO_HITS
+.Pq Event 24H , Umask 04H
+Counts the number of store RFO requests that
+hit the L2 cache.
+.It Li L2_RQSTS.RFO_MISS
+.Pq Event 24H , Umask 08H
+Counts the number of store RFO requests that
+miss the L2 cache.
+.It Li L2_RQSTS.ALL_RFO
+.Pq Event 24H , Umask 0CH
+Counts all L2 store RFO requests.
+.It Li L2_RQSTS.CODE_RD_HIT
+.Pq Event 24H , Umask 10H
+Number of instruction fetches that hit the L2
+cache.
+.It Li L2_RQSTS.CODE_RD_MISS
+.Pq Event 24H , Umask 20H
+Number of instruction fetches that missed the L2
+cache.
+.It Li L2_RQSTS.ALL_CODE_RD
+.Pq Event 24H , Umask 30H
+Counts all L2 code requests.
+.It Li L2_RQSTS.PF_HIT
+.Pq Event 24H , Umask 40H
+Requests from L2 Hardware prefetcher that hit L2.
+.It Li L2_RQSTS.PF_MISS
+.Pq Event 24H , Umask 80H
+Requests from L2 Hardware prefetcher that missed
+L2.
+.It Li L2_RQSTS.ALL_PF
+.Pq Event 24H , Umask C0H
+Any requests from L2 Hardware prefetchers.
+.It Li L2_STORE_LOCK_RQSTS.MISS
+.Pq Event 27H , Umask 01H
+ROs that miss cache lines.
+.It Li L2_STORE_LOCK_RQSTS.HIT_E
+.Pq Event 27H , Umask 04H
+RFOs that hit cache lines in E state.
+.It Li L2_STORE_LOCK_RQSTS.HIT_M
+.Pq Event 27H , Umask 08H
+RFOs that hit cache lines in M state.
+.It Li L2_STORE_LOCK_RQSTS.ALL
+.Pq Event 27H , Umask 0FH
+RFOs that access cache lines in any state.
+.It Li L2_L1D_WB_RQSTS.MISS
+.Pq Event 28H , Umask 01H
+Not rejected writebacks from L1D to L2 cache lines
+that missed L2.
+.It Li L2_L1D_WB_RQSTS.HIT_S
+.Pq Event 28H , Umask 02H
+Not rejected writebacks from L1D to L2 cache lines
+in S state.
+.It Li L2_L1D_WB_RQSTS.HIT_E
+.Pq Event 28H , Umask 04H
+Not rejected writebacks from L1D to L2 cache lines
+in E state.
+.It Li L2_L1D_WB_RQSTS.HIT_M
+.Pq Event 28H , Umask 08H
+Not rejected writebacks from L1D to L2 cache lines
+in M state.
+.It Li L2_L1D_WB_RQSTS.ALL
+.Pq Event 28H , Umask 0FH
+Not rejected writebacks from L1D to L2 cache.
+.It Li LONGEST_LAT_CACHE.REFERENCE
+.Pq Event 2EH , Umask 4FH
+This event counts requests originating from the
+core that reference
+a cache line in the last level cache.
+.It Li LONGEST_LAT_CACHE.MISS
+.Pq Event 2EH , Umask 41H
+This event counts each cache miss condition for
+references to the last level cache.
+.It Li CPU_CLK_UNHALTED.THREAD_P
+.Pq Event 3CH , Umask 00H
+Counts the number of thread cycles while the
+thread is not in a halt state. The thread enters
+the halt state when it is running the HLT
+instruction. The core frequency may change from
+time to time due to power or thermal throttling.
+.It Li CPU_CLK_THREAD_UNHALTED.REF_XCLK
+.Pq Event 3CH , Umask 01H
+Increments at the frequency of XCLK (100 MHz)
+when not halted.
+.It Li L1D_PEND_MISS.PENDING
+.Pq Event 48H , Umask 01H
+Increments the number of outstanding L1D misses
+every cycle.
+Set Cmaks = 1 and Edge =1 to count occurrences.
+.It Li DTLB_STORE_MISSES.MISS_CAUSES_A_WALK
+.Pq Event 49H , Umask 01H
+Miss in all TLB levels causes an page walk of
+any page size (4K/2M/4M/1G).
+.It Li DTLB_STORE_MISSES.WALK_COMPLETED
+.Pq Event 49H , Umask 02H
+Miss in all TLB levels causes a page walk that
+completes of any page size (4K/2M/4M/1G).
+.It Li DTLB_STORE_MISSES.WALK_DURATION
+.Pq Event 49H , Umask 04H
+Cycles PMH is busy with this walk.
+.It Li DTLB_STORE_MISSES.STLB_HIT
+.Pq Event 49H , Umask 10H
+Store operations that miss the first TLB level
+but hit the second and do not cause page walks.
+.It Li LOAD_HIT_PRE.SW_PF
+.Pq Event 4CH , Umask 01H
+Not SW-prefetch load dispatches that hit fill
+buffer allocated for S/W prefetch.
+.It Li LOAD_HIT_PER.HW_PF
+.Pq Event 4CH , Umask 02H
+Not SW-prefetch load dispatches that hit fill
+buffer allocated for H/W prefetch.
+.It Li HW_PRE_REQ.DL1_MISS
+.Pq Event 4EH , Umask 02H
+Hardware Prefetch requests that miss the L1D
+cache. A request is being counted each time
+it access the cache & miss it, including if
+a block is applicable or if hit the Fill
+Buffer for example.
+.It Li L1D.REPLACEMENT
+.Pq Event 51H , Umask 01H
+Counts the number of lines brought into the
+L1 data cache.
+.It Li L1D.ALLOCATED_IN_M
+.Pq Event 51H , Umask 02H
+Counts the number of allocations of modified
+L1D cache lines.
+.It Li L1D.EVICTION
+.Pq Event 51H , Umask 04H
+Counts the number of modified lines evicted
+from the L1 data cache due to replacement.
+.It Li L1D.ALL_M_REPLACEMENT
+.Pq Event 51H , Umask 08H
+Cache lines in M state evicted out of L1D due
+to Snoop HitM or dirty line replacement.
+.It Li PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP
+.Pq Event 59H , Umask 0CH
+Increments the number of flags-merge uops in
+flight each cycle.
+Set Cmask = 1 to count cycles.
+.It Li PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW
+.Pq Event 59H , Umask 0FH
+Cycles with at least one slow LEA uop allocated.
+.It Li PARTIAL_RAT_STALLS.MUL_SINGLE_UOP 
+.Pq Event 59H , Umask 40H
+Number of Multiply packed/scalar single precision
+uops allocated.
+.It Li RESOURCE_STALLS2.ALL_FL_EMPTY 
+.Pq Event 5BH , Umask 0CH
+Cycles stalled due to free list empty.  
+.It Li RESOURCE_STALLS2.ALL_PRF_CONTROL 
+.Pq Event 5BH , Umask 0FH
+Cycles stalled due to control structures full for
+physical registers.
+.It Li RESOURCE_STALLS2.BOB_FULL 
+.Pq Event 5BH , Umask 40H
+Cycles Allocator is stalled due Branch Order Buffer.  
+.It Li RESOURCE_STALLS2.OOO_RSRC 
+.Pq Event 5BH , Umask 4FH
+Cycles stalled due to out of order resources full.  
+.It Li CPL_CYCLES.RING0 
+.Pq Event 5CH , Umask 01H
+Unhalted core cycles when the thread is in ring 0.  
+.It Li CPL_CYCLES.RING123 
+.Pq Event 5CH , Umask 02H
+Unhalted core cycles when the thread is not in ring
+0.
+.It Li RS_EVENTS.EMPTY_CYCLES
+.Pq Event 5EH , Umask 01H
+Cycles the RS is empty for the thread.  
+.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD 
+.Pq Event 60H , Umask 01H
+Offcore outstanding Demand Data Read
+transactions in SQ to uncore. Set Cmask=1 to count
+cycles.
+.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO 
+.Pq Event 60H , Umask 04H
+Offcore outstanding RFO store transactions in SQ to
+uncore. Set Cmask=1 to count cycles.
+.It Li OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD 
+.Pq Event 60H , Umask 08H
+Offcore outstanding cacheable data read
+transactions in SQ to uncore. Set Cmask=1 to count
+cycles.
+.It Li LOCK_CYCLES.SPLIT_LOCK_UC_LOCK_DURATION 
+.Pq Event 63H , Umask 01H
+Cycles in which the L1D and L2 are locked, due to a
+UC lock or split lock.
+.It Li LOCK_CYCLES.CACHE_LOCK_DURATION 
+.Pq Event 63H , Umask 02H
+Cycles in which the L1D is locked.  
+.It Li IDQ.EMPTY 
+.Pq Event 79H , Umask 02H
+Counts cycles the IDQ is empty.
+.It Li IDQ.MITE_UOPS 
+.Pq Event 79H , Umask 04H
+Increment each cycle # of uops delivered to IDQ
+from MITE path.
+Set Cmask = 1 to count cycles.
+.It Li IDQ.DSB_UOPS 
+.Pq Event 79H , Umask 08H
+Increment each cycle. # of uops delivered to IDQ
+from DSB path.
+Set Cmask = 1 to count cycles.
+.It Li IDQ.MS_DSB_UOPS 
+.Pq Event 79H , Umask 10H
+Increment each cycle # of uops delivered to IDQ
+when MS busy by DSB. Set Cmask = 1 to count
+cycles MS is busy. Set Cmask=1 and Edge =1 to
+count MS activations.
+.It Li IDQ.MS_MITE_UOPS 
+.Pq Event 79H , Umask 20H
+Increment each cycle # of uops delivered to IDQ
+when MS is busy by MITE. Set Cmask = 1 to count
+cycles.
+.It Li IDQ.MS_UOPS 
+.Pq Event 79H , Umask 30H
+Increment each cycle # of uops delivered to IDQ
+from MS by either DSB or MITE. Set Cmask = 1 to
+count cycles.
+.It Li ICACHE.MISSES 
+.Pq Event 80H , Umask 02H
+Number of Instruction Cache, Streaming Buffer and
+Victim Cache Misses. Includes UC accesses.
+.It Li ITLB_MISSES.MISS_CAUSES_A_WALK 
+.Pq Event 85H , Umask 01H
+Misses in all ITLB levels that cause page walks.  
+.It Li ITLB_MISSES.WALK_COMPLETED 
+.Pq Event 85H , Umask 02H
+Misses in all ITLB levels that cause completed page
+walks.
+.It Li ITLB_MISSES.WALK_DURATION 
+.Pq Event 85H , Umask 04H
+Cycle PMH is busy with a walk.  
+.It Li ITLB_MISSES.STLB_HIT 
+.Pq Event 85H , Umask 10H
+Number of cache load STLB hits. No page walk.  
+.It Li ILD_STALL.LCP 
+.Pq Event 87H , Umask 01H
+Stalls caused by changing prefix length of the
+instruction.
+.It Li ILD_STALL.IQ_FULL 
+.Pq Event 87H , Umask 04H
+Stall cycles due to IQ is full.  
+.It Li BR_INST_EXEC.COND
+.Pq Event 88H , Umask 01H
+Qualify conditional near branch instructions
+executed, but not necessarily retired.
+.It Li BR_INST_EXEC.DIRECT_JMP 
+.Pq Event 88H , Umask 02H
+Qualify all unconditional near branch instructions
+excluding calls and indirect branches.
+.It Li BR_INST_EXEC.INDIRECT_JMP_NON_CALL_RET 
+.Pq Event 88H , Umask 04H
+Qualify executed indirect near branch instructions
+that are not calls nor returns.
+.It Li BR_INST_EXEC.RETURN_NEAR 
+.Pq Event 88H , Umask 08H
+Qualify indirect near branches that have a return
+mnemonic.
+.It Li BR_INST_EXEC.DIRECT_NEAR_CALL 
+.Pq Event 88H , Umask 10H
+Qualify unconditional near call branch instructions,
+excluding non call branch, executed.
+.It Li BR_INST_EXEC.INDIRECT_NEAR_CALL 
+.Pq Event 88H , Umask 20H
+Qualify indirect near calls, including both register
+and memory indirect, executed.
+.It Li BR_INST_EXEC.NONTAKEN 
+.Pq Event 88H , Umask 40H
+Qualify non-taken near branches executed.  
+.It Li BR_INST_EXEC.TAKEN 
+.Pq Event 88H , Umask 80H
+Qualify taken near branches executed. Must
+combine with 01H,02H, 04H, 08H, 10H, 20H.
+.It Li BR_INST_EXE.ALL_BRANCHES 
+.Pq Event 88H , Umask FFH
+Counts all near executed branches (not necessarily
+retired).
+.It Li BR_MISP_EXEC.COND 
+.Pq Event 89H , Umask 01H
+Qualify conditional near branch instructions
+mispredicted.
+.It Li BR_MISP_EXEC.INDIRECT_JMP_NON_CALL_RET 
+.Pq Event 89H , Umask 04H
+Qualify mispredicted indirect near branch
+instructions that are not calls nor returns.
+.It Li BR_MISP_EXEC.RETURN_NEAR
+.Pq Event 89H , Umask 08H
+Qualify mispredicted indirect near branches that
+have a return mnemonic.
+.It Li BR_MISP_EXEC.DIRECT_NEAR_CALL 
+.Pq Event 89H , Umask 10H
+Qualify mispredicted unconditional near call branch
+instructions, excluding non call branch, executed.
+.It Li BR_MISP_EXEC.INDIRECT_NEAR_CALL 
+.Pq Event 89H , Umask 20H
+Qualify mispredicted indirect near calls, including
+both register and memory indirect, executed.
+.It Li BR_MISP_EXEC.NONTAKEN 
+.Pq Event 89H , Umask 40H
+Qualify mispredicted non-taken near branches
+executed,.
+.It Li BR_MISP_EXEC.TAKEN 
+.Pq Event 89H , Umask 80H
+Qualify mispredicted taken near branches executed.
+Must combine with 01H,02H, 04H, 08H, 10H, 20H
+.It Li BR_MISP_EXEC.ALL_BRANCHES 
+.Pq Event 89H , Umask FFH
+Counts all near executed branches (not necessarily
+retired).
+.It Li IDQ_UOPS_NOT_DELIVERED.CORE 
+.Pq Event 9CH , Umask 01H
+Count number of non-delivered uops to RAT per
+thread.
+.It Li UOPS_DISPATCHED_PORT.PORT_0 
+.Pq Event A1H , Umask 01H
+Cycles which a Uop is dispatched on port 0.  
+.It Li UOPS_DISPATCHED_PORT.PORT_1
+.Pq Event A1H , Umask 02H
+Cycles which a Uop is dispatched on port 1.  
+.It Li UOPS_DISPATCHED_PORT.PORT_2_LD 
+.Pq Event A1H , Umask 04H
+Cycles which a load uop is dispatched on port 2.  
+.It Li UOPS_DISPATCHED_PORT.PORT_2_STA 
+.Pq Event A1H , Umask 08H
+Cycles which a store address uop is dispatched on
+port 2.
+.It Li UOPS_DISPATCHED_PORT.PORT_2 
+.Pq Event A1H , Umask 0CH
+Cycles which a Uop is dispatched on port 2.  
+.It Li UOPS_DISPATCHED_PORT.PORT_3_LD 
+.Pq Event A1H , Umask 10H
+Cycles which a load uop is dispatched on port 3.  
+.It Li UOPS_DISPATCHED_PORT.PORT_3_STA 
+.Pq Event A1H , Umask 20H
+Cycles which a store address uop is dispatched on
+port 3.
+.It Li UOPS_DISPATCHED_PORT.PORT_3 
+.Pq Event A1H , Umask 30H
+Cycles which a Uop is dispatched on port 3.  
+.It Li UOPS_DISPATCHED_PORT.PORT_4
+.Pq Event A1H , Umask 40H
+Cycles which a Uop is dispatched on port 4.
+.It Li UOPS_DISPATCHED_PORT.PORT_5 
+.Pq Event A1H , Umask 80H
+Cycles which a Uop is dispatched on port 5.  
+.It Li RESOURCE_STALLS.ANY 
+.Pq Event A2H , Umask 01H
+Cycles Allocation is stalled due to Resource Related
+reason.
+.It Li RESOURCE_STALLS.LB 
+.Pq Event A2H , Umask 01H
+Counts the cycles of stall due to lack of load buffers.  
+.It Li RESOURCE_STALLS.RS 
+.Pq Event A2H , Umask 04H
+Cycles stalled due to no eligible RS entry available.  
+.It Li RESOURCE_STALLS.SB 
+.Pq Event A2H , Umask 08H
+Cycles stalled due to no store buffers available. (not
+including draining form sync).
+.It Li RESOURCE_STALLS.ROB 
+.Pq Event A2H , Umask 10H
+Cycles stalled due to re-order buffer full.  
+.It Li RESOURCE_STALLS.FCSW 
+.Pq Event A2H , Umask 20H
+Cycles stalled due to writing the FPU control word.  
+.It Li RESOURCE_STALLS.MXCSR 
+.Pq Event A2H , Umask 40H
+Cycles stalled due to the MXCSR register rename
+occurring to close to a previous MXCSR rename.
+.It Li RESOURCE_STALLS.OTHER 
+.Pq Event A2H , Umask 80H
+Cycles stalled while execution was stalled due to
+other resource issues.
+.It Li CYCLE_ACTIVITY.CYCLES_L2_PENDING 
+.Pq Event A3H , Umask 01H
+Cycles with pending L2 miss loads. Set AnyThread
+to count per core.
+.It Li CYCLE_ACTIVITY.CYCLES_L1D_PENDING 
+.Pq Event A3H , Umask 02H
+Cycles with pending L1 cache miss loads.Set
+AnyThread to count per core.
+.It Li CYCLE_ACTIVITY.CYCLES_NO_DISPATCH  
+.Pq Event A3H , Umask 04H
+Cycles of dispatch stalls. Set AnyThread to count per 
+core.
+.It Li DSB2MITE_SWITCHES.COUNT 
+.Pq Event ABH , Umask 01H
+Number of DSB to MITE switches.  
+.It Li DSB2MITE_SWITCHES.PENALTY_CYCLES 
+.Pq Event ABH , Umask 02H
+Cycles DSB to MITE switches caused delay.  
+.It Li DSB_FILL.OTHER_CANCEL 
+.Pq Event ACH , Umask 02H
+Cases of cancelling valid DSB fill not because of
+exceeding way limit.
+.It Li DSB_FILL.EXCEED_DSB_LINES 
+.Pq Event ACH , Umask 08H
+DSB Fill encountered > 3 DSB lines.  
+.It Li DSB_FILL.ALL_CANCEL 
+.Pq Event ACH , Umask 0AH
+Cases of cancelling valid Decode Stream Buffer
+(DSB) fill not because of exceeding way limit.
+.It Li ITLB.ITLB_FLUSH 
+.Pq Event AEH , Umask 01H
+Counts the number of ITLB flushes, includes
+4k/2M/4M pages.
+.It Li OFFCORE_REQUESTS.DEMAND_DATA_RD 
+.Pq Event B0H , Umask 01H
+Demand data read requests sent to uncore.
+.It Li OFFCORE_REQUESTS.DEMAND_RFO 
+.Pq Event B0H , Umask 04H
+Demand RFO read requests sent to uncore, including
+regular RFOs, locks, ItoM.
+.It Li OFFCORE_REQUESTS.ALL_DATA_RD 
+.Pq Event B0H , Umask 08H
+Data read requests sent to uncore (demand and
+prefetch).
+.It Li UOPS_DISPATCHED.THREAD 
+.Pq Event B1H , Umask 01H
+Counts total number of uops to be dispatched per-
+thread each cycle. Set Cmask = 1, INV =1 to count
+stall cycles.
+.It Li UOPS_DISPATCHED.CORE 
+.Pq Event B1H , Umask 02H
+Counts total number of uops to be dispatched per-
+core each cycle.
+.It Li OFFCORE_REQUESTS_BUFFER.SQ_FULL 
+.Pq Event B2H , Umask 01H
+Offcore requests buffer cannot take more entries
+for this thread core.
+.It Li AGU_BYPASS_CANCEL.COUNT
+.Pq Event B6H , Umask 01H
+Counts executed load operations with all the
+following traits: 1. addressing of the format [base +
+offset], 2. the offset is between 1 and 2047, 3. the
+address specified in the base register is in one page
+and the address [base+offset] is in another page.
+.It Li OFF_CORE_RESPONSE_0 
+.Pq Event B7H , Umask 01H
+(Event B7H, Umask 01H) Off-core Response Performance
+Monitoring; PMC0 only.  Requires programming MSR 01A6H
+.It Li OFF_CORE_RESPONSE_1 
+.Pq Event BBH , Umask 01H
+(Event BBH, Umask 01H) Off-core Response Performance
+Monitoring; PMC3 only.  Requires programming MSR 01A7H
+.It Li TLB_FLUSH.DTLB_THREAD 
+.Pq Event BDH , Umask 01H
+DTLB flush attempts of the thread-specific entries.  
+.It Li TLB_FLUSH.STLB_ANY 
+.Pq Event BDH , Umask 20H
+Count number of STLB flush attempts.  
+.It Li L1D_BLOCKS.BANK_CONFLICT_CYCLES 
+.Pq Event BFH , Umask 05H
+Cycles when dispatched loads are cancelled due to
+L1D bank conflicts with other load ports.
+.It Li INST_RETIRED.ANY_P 
+.Pq Event C0H , Umask 00H
+Number of instructions at retirement.  
+.It Li INST_RETIRED.ALL 
+.Pq Event C0H , Umask 01H
+Precise instruction retired event with HW to reduce
+effect of PEBS shadow in IP distribution.
+.It Li OTHER_ASSISTS.ITLB_MISS_RETIRED
+.Pq Event C1H , Umask 02H
+Instructions that experienced an ITLB miss.  
+.It Li OTHER_ASSISTS.AVX_STORE 
+.Pq Event C1H , Umask 08H
+Number of assists associated with 256-bit AVX
+store operations.
+.It Li OTHER_ASSISTS.AVX_TO_SSE 
+.Pq Event C1H , Umask 10H
+Number of transitions from AVX-256 to legacy SSE
+when penalty applicable.
+.It Li OTHER_ASSISTS.SSE_TO_AVX 
+.Pq Event C1H , Umask 20H
+Number of transitions from SSE to AVX-256 when
+penalty applicable.
+.It Li UOPS_RETIRED.ALL 
+.Pq Event C2H , Umask 01H
+Counts the number of micro-ops retired, Use
+cmask=1 and invert to count active cycles or stalled
+cycles.
+.It Li UOPS_RETIRED.RETIRE_SLOTS
+.Pq Event C2H , Umask 02H
+Counts the number of retirement slots used each
+cycle.
+.It Li MACHINE_CLEARS.MEMORY_ORDERING 
+.Pq Event C3H , Umask 02H
+Counts the number of machine clears due to
+memory order conflicts.
+.It Li MACHINE_CLEARS.SMC
+.Pq Event C3H , Umask 04H
+Counts the number of times that a program writes
+to a code section.
+.It Li MACHINE_CLEARS.MASKMOV 
+.Pq Event C3H , Umask 20H
+Counts the number of executed AVX masked load
+operations that refer to an illegal address range
+with the mask bits set to 0.
+.It Li BR_INST_RETIRED.ALL_BRANCH 
+.Pq Event C4H , Umask 00H
+Branch instructions at retirement.  
+.It Li BR_INST_RETIRED.CONDITIONAL 
+.Pq Event C4H , Umask 01H
+Counts the number of conditional branch
+instructions retired.
+.It Li BR_INST_RETIRED.NEAR_CALL 
+.Pq Event C4H , Umask 02H
+Direct and indirect near call instructions retired.  
+.It Li BR_INST_RETIRED.ALL_BRANCHES 
+.Pq Event C4H , Umask 04H
+Counts the number of branch instructions retired.  
+.It Li BR_INST_RETIRED.NEAR_RETURN 
+.Pq Event C4H , Umask 08H
+Counts the number of near return instructions
+retired.
+.It Li BR_INST_RETIRED.NOT_TAKEN
+.Pq Event C4H , Umask 10H
+Counts the number of not taken branch instructions
+retired.
+.It Li BR_INST_RETIRED.NEAR_TAKEN
+.Pq Event C4H , Umask 20H
+Number of near taken branches retired.  
+.It Li BR_INST_RETIRED.FAR_BRANCH
+.Pq Event C4H , Umask 40H
+Number of far branches retired.  
+.It Li BR_MISP_RETIRED.ALL_BRANCHES
+.Pq Event C5H , Umask 00H
+Mispredicted branch instructions at retirement.  
+.It Li BR_MISP_RETIRED.CONDITIONAL
+.Pq Event C5H , Umask 01H
+Mispredicted conditional branch instructions retired.   
+.It Li BR_MISP_RETIRED.NEAR_CALL
+.Pq Event C5H , Umask 02H
+Direct and indirect mispredicted near call
+instructions retired.
+.It Li BR_MISP_RETIRED.ALL_BRANCHES
+.Pq Event C5H , Umask 04H
+Mispredicted macro branch instructions retired.
+.It Li BR_MISP_RETIRED.NOT_TAKEN
+.Pq Event C5H , Umask 10H
+Mispredicted not taken branch instructions retired.  
+.It Li BR_MISP_RETIRED.TAKEN
+.Pq Event C5H , Umask 20H
+Mispredicted taken branch instructions retired.  
+.It Li FP_ASSIST.X87_OUTPUT
+.Pq Event CAH , Umask 02H
+Number of X87 assists due to output value.  
+.It Li FP_ASSIST.X87_INPUT
+.Pq Event CAH , Umask 04H
+Number of X87 assists due to input value.  
+.It Li FP_ASSIST.SIMD_OUTPUT
+.Pq Event CAH , Umask 08H
+ Number of SIMD FP assists due to output values. 
+.It Li FP_ASSIST.SIMD_INPUT
+.Pq Event CAH , Umask 10H
+Number of SIMD FP assists due to input values.  
+.It Li FP_ASSIST.ANY 1EH
+.Pq Event CAH , Umask
+Cycles with any input/output SSE* or FP assists.  
+.It Li ROB_MISC_EVENTS.LBR_INSERTS
+.Pq Event CCH , Umask 20H
+Count cases of saving new LBR records by
+hardware.
+.It Li MEM_TRANS_RETIRED.LOAD_LATENCY
+.Pq Event CDH , Umask 01H
+Sample loads with specified latency threshold.
+PMC3 only.
+.It Li MEM_TRANS_RETIRED.PRECISE_STORE
+.Pq Event CDH , Umask 02H
+Sample stores and collect precise store operation
+via PEBS record. PMC3 only.
+.It Li MEM_UOP_RETIRED.LOADS
+.Pq Event D0H , Umask 10H
+Qualify retired memory uops that are loads.
+Combine with umask 10H, 20H, 40H, 80H.
+.It Li MEM_UOP_RETIRED.STORES
+.Pq Event D0H , Umask 02H
+Qualify retired memory uops that are stores.
+Combine with umask 10H, 20H, 40H, 80H.
+.It Li MEM_UOP_RETIRED.STLB_MISS
+.Pq Event D0H , Umask
+Qualify retired memory uops with STLB miss. Must
+combine with umask 01H, 02H, to produce counts.
+.It Li MEM_UOP_RETIRED.LOCK
+.Pq Event D0H , Umask
+Qualify retired memory uops with lock. Must
+combine with umask 01H, 02H, to produce counts.
+.It Li MEM_UOP_RETIRED.SPLIT
+.Pq Event D0H , Umask
+Qualify retired memory uops with line split. Must

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201210191701.q9JH1S5G037802>