Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 23 Mar 2026 20:22:15 +0000
Message-ID:  <69c1a0f7.47fb8.263fa653@gitrepo.freebsd.org>

index | next in thread | raw e-mail

The branch main has been updated by mhorne:

URL: https://cgit.FreeBSD.org/src/commit/?id=df47355fae720fd8f63f36a50c8933f8342483d2

commit df47355fae720fd8f63f36a50c8933f8342483d2
Author:     Ali Mashtizadeh <mashti@uwaterloo.ca>
AuthorDate: 2026-03-18 04:27:09 +0000
Commit:     Mitchell Horne <mhorne@FreeBSD.org>
CommitDate: 2026-03-23 20:21:28 +0000

    libpmc: Add support for IBS qualifiers
    
    Add support to libpmc for parsing the IBS qualifiers and computing the
    ctl register value as a function of the qualifiers and the sample rate.
    This includes all of the flags available up to AMD Zen 5.  Along side
    these user facing changes I included the documentation for AMD IBS.
    
    Reviewed by:    mhorne
    Sponsored by:   Netflix
    Pull Request:   https://github.com/freebsd/freebsd-src/pull/2081
---
 lib/libpmc/Makefile       |   1 +
 lib/libpmc/libpmc.c       |  71 ++++++++++++++++++----
 lib/libpmc/pmc.3          |   7 +++
 lib/libpmc/pmc.amd.3      |   1 +
 lib/libpmc/pmc.core.3     |   1 +
 lib/libpmc/pmc.core2.3    |   1 +
 lib/libpmc/pmc.iaf.3      |   1 +
 lib/libpmc/pmc.ibs.3      | 150 ++++++++++++++++++++++++++++++++++++++++++++++
 lib/libpmc/pmc.soft.3     |   1 +
 lib/libpmc/pmc.tsc.3      |   1 +
 lib/libpmc/pmc.ucf.3      |   1 +
 sys/dev/hwpmc/hwpmc_ibs.h |  19 +++++-
 12 files changed, 244 insertions(+), 11 deletions(-)

diff --git a/lib/libpmc/Makefile b/lib/libpmc/Makefile
index 590f719ebff4..442efdc3d9c0 100644
--- a/lib/libpmc/Makefile
+++ b/lib/libpmc/Makefile
@@ -74,6 +74,7 @@ MAN+=	pmc.haswell.3
 MAN+=	pmc.haswelluc.3
 MAN+=	pmc.haswellxeon.3
 MAN+=	pmc.iaf.3
+MAN+=	pmc.ibs.3
 MAN+=	pmc.ivybridge.3
 MAN+=	pmc.ivybridgexeon.3
 MAN+=	pmc.sandybridge.3
diff --git a/lib/libpmc/libpmc.c b/lib/libpmc/libpmc.c
index ceba40aa7b39..ebb642e8d16b 100644
--- a/lib/libpmc/libpmc.c
+++ b/lib/libpmc/libpmc.c
@@ -696,7 +696,7 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec,
     struct pmc_op_pmcallocate *pmc_config)
 {
 	char *e, *p, *q;
-	uint64_t ctl;
+	uint64_t ctl, ldlat;
 
 	pmc_config->pm_caps |=
 	    (PMC_CAP_SYSTEM | PMC_CAP_EDGE | PMC_CAP_PRECISE);
@@ -714,23 +714,74 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec,
 		return (-1);
 	}
 
+	/* IBS only supports sampling mode */
+	if (!PMC_IS_SAMPLING_MODE(pmc_config->pm_mode)) {
+		return (-1);
+	}
+
 	/* parse parameters */
-	while ((p = strsep(&ctrspec, ",")) != NULL) {
-		if (KWPREFIXMATCH(p, "ctl=")) {
-			q = strchr(p, '=');
-			if (*++q == '\0') /* skip '=' */
+	ctl = 0;
+	if (pe == PMC_EV_IBS_FETCH) {
+		while ((p = strsep(&ctrspec, ",")) != NULL) {
+			if (KWMATCH(p, "l3miss")) {
+				ctl |= IBS_FETCH_CTL_L3MISSONLY;
+			} else if (KWMATCH(p, "randomize")) {
+				ctl |= IBS_FETCH_CTL_RANDOMIZE;
+			} else {
 				return (-1);
+			}
+		}
 
-			ctl = strtoull(q, &e, 0);
-			if (e == q || *e != '\0')
+		if (pmc_config->pm_count < IBS_FETCH_MIN_RATE ||
+		    pmc_config->pm_count > IBS_FETCH_MAX_RATE)
+			return (-1);
+
+		ctl |= IBS_FETCH_INTERVAL_TO_CTL(pmc_config->pm_count);
+	} else {
+		while ((p = strsep(&ctrspec, ",")) != NULL) {
+			if (KWMATCH(p, "l3miss")) {
+				ctl |= IBS_OP_CTL_L3MISSONLY;
+			} else if (KWPREFIXMATCH(p, "ldlat=")) {
+				q = strchr(p, '=');
+				if (*++q == '\0') /* skip '=' */
+					return (-1);
+
+				ldlat = strtoull(q, &e, 0);
+				if (e == q || *e != '\0')
+					return (-1);
+
+				/*
+				 * IBS load latency filtering requires the
+				 * latency to be a multiple of 128 and between
+				 * 128 and 2048.  The latency is stored in the
+				 * IbsOpLatThrsh field, which only contains
+				 * four bits so the processor computes
+				 * (IbsOpLatThrsh+1)*128 as the value.
+				 *
+				 * AMD PPR Vol 1 for AMD Family 1Ah Model 02h
+				 * C1 (57238) 2026-03-06 Revision 0.49.
+				 */
+				if (ldlat < 128 || ldlat > 2048)
+					return (-1);
+				ctl |= IBS_OP_CTL_LDLAT_TO_CTL(ldlat);
+				ctl |= IBS_OP_CTL_L3MISSONLY | IBS_OP_CTL_LATFLTEN;
+			} else if (KWMATCH(p, "randomize")) {
+				ctl |= IBS_OP_CTL_COUNTERCONTROL;
+			} else {
 				return (-1);
+			}
+		}
 
-			pmc_config->pm_md.pm_ibs.ibs_ctl |= ctl;
-		} else {
+		if (pmc_config->pm_count < IBS_OP_MIN_RATE ||
+		    pmc_config->pm_count > IBS_OP_MAX_RATE)
 			return (-1);
-		}
+
+		ctl |= IBS_OP_INTERVAL_TO_CTL(pmc_config->pm_count);
 	}
 
+
+	pmc_config->pm_md.pm_ibs.ibs_ctl |= ctl;
+
 	return (0);
 }
 
diff --git a/lib/libpmc/pmc.3 b/lib/libpmc/pmc.3
index 9a5b599759ff..cb28e0b786b9 100644
--- a/lib/libpmc/pmc.3
+++ b/lib/libpmc/pmc.3
@@ -224,6 +224,11 @@ performance measurement architecture version 2 and later.
 Programmable hardware counters present in CPUs conforming to the
 .Tn Intel
 performance measurement architecture version 1 and later.
+.It Li PMC_CLASS_IBS
+.Tn AMD
+Instruction Based Sampling (IBS) counters present in
+.Tn AMD
+Family 10h and above.
 .It Li PMC_CLASS_K8
 Programmable hardware counters present in
 .Tn "AMD Athlon64"
@@ -491,6 +496,7 @@ following manual pages:
 .It Em "PMC Class"      Ta Em "Manual Page"
 .It Li PMC_CLASS_IAF    Ta Xr pmc.iaf 3
 .It Li PMC_CLASS_IAP    Ta Xr pmc.atom 3 , Xr pmc.core 3 , Xr pmc.core2 3
+.It Li PMC_CLASS_IBS    Ta Xr pmc.ibs 3
 .It Li PMC_CLASS_K8     Ta Xr pmc.amd 3
 .It Li PMC_CLASS_TSC    Ta Xr pmc.tsc 3
 .El
@@ -542,6 +548,7 @@ Doing otherwise is unsupported.
 .Xr pmc.haswelluc 3 ,
 .Xr pmc.haswellxeon 3 ,
 .Xr pmc.iaf 3 ,
+.Xr pmc.ibs 3 ,
 .Xr pmc.ivybridge 3 ,
 .Xr pmc.ivybridgexeon 3 ,
 .Xr pmc.sandybridge 3 ,
diff --git a/lib/libpmc/pmc.amd.3 b/lib/libpmc/pmc.amd.3
index 047b31aa78bb..75c6331b000f 100644
--- a/lib/libpmc/pmc.amd.3
+++ b/lib/libpmc/pmc.amd.3
@@ -777,6 +777,7 @@ and the underlying hardware events used.
 .Xr pmc.core 3 ,
 .Xr pmc.core2 3 ,
 .Xr pmc.iaf 3 ,
+.Xr pmc.ibs 3 ,
 .Xr pmc.soft 3 ,
 .Xr pmc.tsc 3 ,
 .Xr pmclog 3 ,
diff --git a/lib/libpmc/pmc.core.3 b/lib/libpmc/pmc.core.3
index b4fa9ab661a4..4c41e7c7ad3b 100644
--- a/lib/libpmc/pmc.core.3
+++ b/lib/libpmc/pmc.core.3
@@ -786,6 +786,7 @@ may not count some transitions.
 .Xr pmc.atom 3 ,
 .Xr pmc.core2 3 ,
 .Xr pmc.iaf 3 ,
+.Xr pmc.ibs 3 ,
 .Xr pmc.soft 3 ,
 .Xr pmc.tsc 3 ,
 .Xr pmclog 3 ,
diff --git a/lib/libpmc/pmc.core2.3 b/lib/libpmc/pmc.core2.3
index 86604b7ff16c..7e544fad43b6 100644
--- a/lib/libpmc/pmc.core2.3
+++ b/lib/libpmc/pmc.core2.3
@@ -1101,6 +1101,7 @@ and the underlying hardware events used.
 .Xr pmc.atom 3 ,
 .Xr pmc.core 3 ,
 .Xr pmc.iaf 3 ,
+.Xr pmc.ibs 3 ,
 .Xr pmc.soft 3 ,
 .Xr pmc.tsc 3 ,
 .Xr pmc_cpuinfo 3 ,
diff --git a/lib/libpmc/pmc.iaf.3 b/lib/libpmc/pmc.iaf.3
index eaf45db140f5..c3528e472103 100644
--- a/lib/libpmc/pmc.iaf.3
+++ b/lib/libpmc/pmc.iaf.3
@@ -125,6 +125,7 @@ CPU, use the event specifier
 .Xr pmc.atom 3 ,
 .Xr pmc.core 3 ,
 .Xr pmc.core2 3 ,
+.Xr pmc.ibs 3 ,
 .Xr pmc.soft 3 ,
 .Xr pmc.tsc 3 ,
 .Xr pmc_cpuinfo 3 ,
diff --git a/lib/libpmc/pmc.ibs.3 b/lib/libpmc/pmc.ibs.3
new file mode 100644
index 000000000000..69b90b84556c
--- /dev/null
+++ b/lib/libpmc/pmc.ibs.3
@@ -0,0 +1,150 @@
+.\" Copyright (c) 2016 Ali Mashtizadeh.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.Dd March 15, 2026
+.Dt PMC.IBS 3
+.Os
+.Sh NAME
+.Nm pmc.ibs
+.Nd Instruction Based Sampling for
+.Tn AMD
+CPUs
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Sh DESCRIPTION
+AMD Instruction Based Sampling (IBS) was introduced with the K10 family of
+CPUs.
+AMD IBS is an alternative approach that samples instructions or micro-ops and
+provides a per-instruction or micro-op breakdown of the sources of stalls.
+.Pp
+Unlike traditional counters, IBS can only be used in the sampling mode and
+provides extra data embedded in the callchain.
+IBS events set the PMC_F_MULTIPART flag to signify multiple payload types are
+contained in the callchain.
+The first 8 bytes of the callchain contain four tuples with a one byte type and
+a one byte length field.
+The regular PMC callchain can be found following the multipart payload.
+.Pp
+IBS only provides two events that analyze instruction fetches and instruction
+execution.
+The instruction fetch (ibs-fetch) event provides data on the processor
+front-end including reporting instruction cache and TLB events.
+The instruction execution (ibs-op) event provides data on the processor
+execution including reporting mispredictions, data cache and TLB events.
+You should use the AMD PMC counters documented in
+.Xr pmc.amd 3
+to analyze stalls relating instruction issue including reservation contention.
+.Pp
+A guide to analyzing IBS data is provided in Appendix G of the
+.Rs
+.%B "Software Optimization Guide for AMD Family 10h and 12h Processors"
+.%N "Publication No. 40546"
+.%D "February 2011"
+.%Q "Advanced Micro Devices, Inc."
+.Re
+A more recent document should be used for decoding all of the flags and fields
+in the IBS data.
+For example, see the AMD Zen 5 documentation
+.Rs
+.%B "Processor Programming Reference (PPR) for AMD Family 1Ah Model 02h"
+.%N "Publication No. 57238"
+.%D "March 6, 2026"
+.%Q "Advanced Micro Devices, Inc."
+.Re
+.Ss PMC Features
+AMD IBS supports the following capabilities.
+.Bl -column "PMC_CAP_INTERRUPT" "Support"
+.It Em Capability Ta Em Support
+.It PMC_CAP_CASCADE Ta \&No
+.It PMC_CAP_EDGE Ta Yes
+.It PMC_CAP_INTERRUPT Ta Yes
+.It PMC_CAP_INVERT Ta \&No
+.It PMC_CAP_READ Ta \&No
+.It PMC_CAP_PRECISE Ta Yes
+.It PMC_CAP_SYSTEM Ta Yes
+.It PMC_CAP_TAGGING Ta \&No
+.It PMC_CAP_THRESHOLD Ta \&No
+.It PMC_CAP_USER Ta \&No
+.It PMC_CAP_WRITE Ta \&No
+.El
+.Pp
+By default AMD IBS enables the edge, interrupt, system and precise flags.
+.Ss Event Qualifiers
+Event specifiers for AMD IBS can have the following optional
+qualifiers:
+.Bl -tag -width "ldlat=value"
+.It Li l3miss
+Configure IBS to only sample if an l3miss occurred.
+.It Li ldlat= Ns Ar value
+Configure the counter to only sample events with load latencies above
+.Ar ldlat .
+IBS only supports filtering latencies that are a multiple of 128 and between
+128 and 2048.
+Load latency filtering can only be used with ibs-op events and imply the
+l3miss qualifier.
+.It Li randomize
+Randomize the sampling rate.
+.El
+.Ss AMD IBS Events Specifiers
+The IBS event class provides only two event specifiers:
+.Bl -tag -width indent
+.It Li ibs-fetch Xo
+.Op ,l3miss
+.Op ,randomize
+.Xc
+Collect performance samples during instruction fetch.
+The
+.Ar randomize
+qualifier randomly sets the bottom four bits of the sample rate.
+.It Li ibs-op Xo
+.Op ,l3miss
+.Op ,ldlat= Ns Ar ldlat
+.Op ,randomize
+.Xc
+Collect performance samples during instruction execution.
+The
+.Ar randomize
+qualifier, upon reaching the maximum count, restarts the count with a value
+between 1 and 127.
+.El
+.Pp
+You may collect both events at the same time.
+N.B. AMD discouraged doing so with certain older processors, stating that
+sampling both simultaneously perturbs the results.
+Please see the processor programming reference for your specific processor.
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc.amd 3 ,
+.Xr pmc.soft 3 ,
+.Xr pmc.tsc 3 ,
+.Xr pmclog 3 ,
+.Xr hwpmc 4
+.Sh HISTORY
+AMD IBS support was first introduced in
+.Fx 16.0 .
+.Sh AUTHORS
+AMD IBS support and this manual page were written
+.An Ali Mashtizadeh Aq Mt ali@mashtizadeh.com
+and sponsored by Netflix, Inc.
diff --git a/lib/libpmc/pmc.soft.3 b/lib/libpmc/pmc.soft.3
index 08d5af63d02d..f58b3e8ffa26 100644
--- a/lib/libpmc/pmc.soft.3
+++ b/lib/libpmc/pmc.soft.3
@@ -90,6 +90,7 @@ Write page fault.
 .Xr pmc.corei7 3 ,
 .Xr pmc.corei7uc 3 ,
 .Xr pmc.iaf 3 ,
+.Xr pmc.ibs 3 ,
 .Xr pmc.tsc 3 ,
 .Xr pmc.ucf 3 ,
 .Xr pmc.westmereuc 3 ,
diff --git a/lib/libpmc/pmc.tsc.3 b/lib/libpmc/pmc.tsc.3
index 4834d897f90c..73e2377df0c7 100644
--- a/lib/libpmc/pmc.tsc.3
+++ b/lib/libpmc/pmc.tsc.3
@@ -62,6 +62,7 @@ maps to the TSC.
 .Xr pmc.core 3 ,
 .Xr pmc.core2 3 ,
 .Xr pmc.iaf 3 ,
+.Xr pmc.ibs 3 ,
 .Xr pmc.soft 3 ,
 .Xr pmclog 3 ,
 .Xr hwpmc 4
diff --git a/lib/libpmc/pmc.ucf.3 b/lib/libpmc/pmc.ucf.3
index a7cea6bb57f9..37ee0f87a951 100644
--- a/lib/libpmc/pmc.ucf.3
+++ b/lib/libpmc/pmc.ucf.3
@@ -88,6 +88,7 @@ offset C0H under device number 0 and Function 0.
 .Xr pmc.corei7 3 ,
 .Xr pmc.corei7uc 3 ,
 .Xr pmc.iaf 3 ,
+.Xr pmc.ibs 3 ,
 .Xr pmc.soft 3 ,
 .Xr pmc.tsc 3 ,
 .Xr pmc.westmere 3 ,
diff --git a/sys/dev/hwpmc/hwpmc_ibs.h b/sys/dev/hwpmc/hwpmc_ibs.h
index 4449b44c8368..01fc88648558 100644
--- a/sys/dev/hwpmc/hwpmc_ibs.h
+++ b/sys/dev/hwpmc/hwpmc_ibs.h
@@ -67,6 +67,18 @@
 #define IBS_CTL_LVTOFFSETVALID		(1ULL << 8)
 #define IBS_CTL_LVTOFFSETMASK		0x0000000F
 
+/*
+ * The minimum sampling rate was selected to match the default used by other
+ * counters that was also found to be experimentally stable by providing enough
+ * time between consecutive NMIs.  The maximum sample rate is determined by
+ * setting all available counter bits, i.e., all available bits except the
+ * bottom four that are zero extended.
+ */
+#define IBS_FETCH_MIN_RATE		65536
+#define IBS_FETCH_MAX_RATE		1048560
+#define IBS_OP_MIN_RATE			65536
+#define IBS_OP_MAX_RATE			134217712
+
 /* IBS Fetch Control */
 #define IBS_FETCH_CTL			0xC0011030 /* IBS Fetch Control */
 #define IBS_FETCH_CTL_L3MISS		(1ULL << 61) /* L3 Cache Miss */
@@ -82,7 +94,8 @@
 #define IBS_FETCH_CTL_ENABLE		(1ULL << 48) /* Enable */
 #define IBS_FETCH_CTL_MAXCNTMASK	0x0000FFFFULL
 
-#define IBS_FETCH_CTL_TO_LAT(_c)	((_c >> 32) & 0x0000FFFF)
+#define IBS_FETCH_INTERVAL_TO_CTL(_c)	(((_c) >> 4) & 0x0000FFFF)
+#define IBS_FETCH_CTL_TO_LAT(_c)	(((_c) >> 32) & 0x0000FFFF)
 
 #define IBS_FETCH_LINADDR		0xC0011031 /* Fetch Linear Address */
 #define IBS_FETCH_PHYSADDR		0xC0011032 /* Fetch Physical Address */
@@ -95,12 +108,16 @@
 
 /* IBS Execution Control */
 #define IBS_OP_CTL			0xC0011033 /* IBS Execution Control */
+#define IBS_OP_CTL_LATFLTEN		(1ULL << 63) /* Load Latency Filtering */
 #define IBS_OP_CTL_COUNTERCONTROL	(1ULL << 19) /* Counter Control */
 #define IBS_OP_CTL_VALID		(1ULL << 18) /* Valid */
 #define IBS_OP_CTL_ENABLE		(1ULL << 17) /* Enable */
 #define IBS_OP_CTL_L3MISSONLY		(1ULL << 16) /* L3 Miss Filtering */
 #define IBS_OP_CTL_MAXCNTMASK		0x0000FFFFULL
 
+#define IBS_OP_CTL_LDLAT_TO_CTL(_c)	((((ldlat) >> 7) - 1) << 59)
+#define IBS_OP_INTERVAL_TO_CTL(_c)	((((_c) >> 4) & 0x0000FFFFULL) | ((_c) & 0x07F00000))
+
 #define IBS_OP_RIP			0xC0011034 /* IBS Op RIP */
 #define IBS_OP_DATA			0xC0011035 /* IBS Op Data */
 #define IBS_OP_DATA_RIPINVALID		(1ULL << 38) /* RIP Invalid */


home | help

Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?69c1a0f7.47fb8.263fa653>