Date: Mon, 23 Mar 2026 20:22:15 +0000 Message-ID: <69c1a0f7.47fb8.263fa653@gitrepo.freebsd.org>
index | next in thread | raw e-mail
The branch main has been updated by mhorne: URL: https://cgit.FreeBSD.org/src/commit/?id=df47355fae720fd8f63f36a50c8933f8342483d2 commit df47355fae720fd8f63f36a50c8933f8342483d2 Author: Ali Mashtizadeh <mashti@uwaterloo.ca> AuthorDate: 2026-03-18 04:27:09 +0000 Commit: Mitchell Horne <mhorne@FreeBSD.org> CommitDate: 2026-03-23 20:21:28 +0000 libpmc: Add support for IBS qualifiers Add support to libpmc for parsing the IBS qualifiers and computing the ctl register value as a function of the qualifiers and the sample rate. This includes all of the flags available up to AMD Zen 5. Along side these user facing changes I included the documentation for AMD IBS. Reviewed by: mhorne Sponsored by: Netflix Pull Request: https://github.com/freebsd/freebsd-src/pull/2081 --- lib/libpmc/Makefile | 1 + lib/libpmc/libpmc.c | 71 ++++++++++++++++++---- lib/libpmc/pmc.3 | 7 +++ lib/libpmc/pmc.amd.3 | 1 + lib/libpmc/pmc.core.3 | 1 + lib/libpmc/pmc.core2.3 | 1 + lib/libpmc/pmc.iaf.3 | 1 + lib/libpmc/pmc.ibs.3 | 150 ++++++++++++++++++++++++++++++++++++++++++++++ lib/libpmc/pmc.soft.3 | 1 + lib/libpmc/pmc.tsc.3 | 1 + lib/libpmc/pmc.ucf.3 | 1 + sys/dev/hwpmc/hwpmc_ibs.h | 19 +++++- 12 files changed, 244 insertions(+), 11 deletions(-) diff --git a/lib/libpmc/Makefile b/lib/libpmc/Makefile index 590f719ebff4..442efdc3d9c0 100644 --- a/lib/libpmc/Makefile +++ b/lib/libpmc/Makefile @@ -74,6 +74,7 @@ MAN+= pmc.haswell.3 MAN+= pmc.haswelluc.3 MAN+= pmc.haswellxeon.3 MAN+= pmc.iaf.3 +MAN+= pmc.ibs.3 MAN+= pmc.ivybridge.3 MAN+= pmc.ivybridgexeon.3 MAN+= pmc.sandybridge.3 diff --git a/lib/libpmc/libpmc.c b/lib/libpmc/libpmc.c index ceba40aa7b39..ebb642e8d16b 100644 --- a/lib/libpmc/libpmc.c +++ b/lib/libpmc/libpmc.c @@ -696,7 +696,7 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec, struct pmc_op_pmcallocate *pmc_config) { char *e, *p, *q; - uint64_t ctl; + uint64_t ctl, ldlat; pmc_config->pm_caps |= (PMC_CAP_SYSTEM | PMC_CAP_EDGE | PMC_CAP_PRECISE); @@ -714,23 +714,74 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec, return (-1); } + /* IBS only supports sampling mode */ + if (!PMC_IS_SAMPLING_MODE(pmc_config->pm_mode)) { + return (-1); + } + /* parse parameters */ - while ((p = strsep(&ctrspec, ",")) != NULL) { - if (KWPREFIXMATCH(p, "ctl=")) { - q = strchr(p, '='); - if (*++q == '\0') /* skip '=' */ + ctl = 0; + if (pe == PMC_EV_IBS_FETCH) { + while ((p = strsep(&ctrspec, ",")) != NULL) { + if (KWMATCH(p, "l3miss")) { + ctl |= IBS_FETCH_CTL_L3MISSONLY; + } else if (KWMATCH(p, "randomize")) { + ctl |= IBS_FETCH_CTL_RANDOMIZE; + } else { return (-1); + } + } - ctl = strtoull(q, &e, 0); - if (e == q || *e != '\0') + if (pmc_config->pm_count < IBS_FETCH_MIN_RATE || + pmc_config->pm_count > IBS_FETCH_MAX_RATE) + return (-1); + + ctl |= IBS_FETCH_INTERVAL_TO_CTL(pmc_config->pm_count); + } else { + while ((p = strsep(&ctrspec, ",")) != NULL) { + if (KWMATCH(p, "l3miss")) { + ctl |= IBS_OP_CTL_L3MISSONLY; + } else if (KWPREFIXMATCH(p, "ldlat=")) { + q = strchr(p, '='); + if (*++q == '\0') /* skip '=' */ + return (-1); + + ldlat = strtoull(q, &e, 0); + if (e == q || *e != '\0') + return (-1); + + /* + * IBS load latency filtering requires the + * latency to be a multiple of 128 and between + * 128 and 2048. The latency is stored in the + * IbsOpLatThrsh field, which only contains + * four bits so the processor computes + * (IbsOpLatThrsh+1)*128 as the value. + * + * AMD PPR Vol 1 for AMD Family 1Ah Model 02h + * C1 (57238) 2026-03-06 Revision 0.49. + */ + if (ldlat < 128 || ldlat > 2048) + return (-1); + ctl |= IBS_OP_CTL_LDLAT_TO_CTL(ldlat); + ctl |= IBS_OP_CTL_L3MISSONLY | IBS_OP_CTL_LATFLTEN; + } else if (KWMATCH(p, "randomize")) { + ctl |= IBS_OP_CTL_COUNTERCONTROL; + } else { return (-1); + } + } - pmc_config->pm_md.pm_ibs.ibs_ctl |= ctl; - } else { + if (pmc_config->pm_count < IBS_OP_MIN_RATE || + pmc_config->pm_count > IBS_OP_MAX_RATE) return (-1); - } + + ctl |= IBS_OP_INTERVAL_TO_CTL(pmc_config->pm_count); } + + pmc_config->pm_md.pm_ibs.ibs_ctl |= ctl; + return (0); } diff --git a/lib/libpmc/pmc.3 b/lib/libpmc/pmc.3 index 9a5b599759ff..cb28e0b786b9 100644 --- a/lib/libpmc/pmc.3 +++ b/lib/libpmc/pmc.3 @@ -224,6 +224,11 @@ performance measurement architecture version 2 and later. Programmable hardware counters present in CPUs conforming to the .Tn Intel performance measurement architecture version 1 and later. +.It Li PMC_CLASS_IBS +.Tn AMD +Instruction Based Sampling (IBS) counters present in +.Tn AMD +Family 10h and above. .It Li PMC_CLASS_K8 Programmable hardware counters present in .Tn "AMD Athlon64" @@ -491,6 +496,7 @@ following manual pages: .It Em "PMC Class" Ta Em "Manual Page" .It Li PMC_CLASS_IAF Ta Xr pmc.iaf 3 .It Li PMC_CLASS_IAP Ta Xr pmc.atom 3 , Xr pmc.core 3 , Xr pmc.core2 3 +.It Li PMC_CLASS_IBS Ta Xr pmc.ibs 3 .It Li PMC_CLASS_K8 Ta Xr pmc.amd 3 .It Li PMC_CLASS_TSC Ta Xr pmc.tsc 3 .El @@ -542,6 +548,7 @@ Doing otherwise is unsupported. .Xr pmc.haswelluc 3 , .Xr pmc.haswellxeon 3 , .Xr pmc.iaf 3 , +.Xr pmc.ibs 3 , .Xr pmc.ivybridge 3 , .Xr pmc.ivybridgexeon 3 , .Xr pmc.sandybridge 3 , diff --git a/lib/libpmc/pmc.amd.3 b/lib/libpmc/pmc.amd.3 index 047b31aa78bb..75c6331b000f 100644 --- a/lib/libpmc/pmc.amd.3 +++ b/lib/libpmc/pmc.amd.3 @@ -777,6 +777,7 @@ and the underlying hardware events used. .Xr pmc.core 3 , .Xr pmc.core2 3 , .Xr pmc.iaf 3 , +.Xr pmc.ibs 3 , .Xr pmc.soft 3 , .Xr pmc.tsc 3 , .Xr pmclog 3 , diff --git a/lib/libpmc/pmc.core.3 b/lib/libpmc/pmc.core.3 index b4fa9ab661a4..4c41e7c7ad3b 100644 --- a/lib/libpmc/pmc.core.3 +++ b/lib/libpmc/pmc.core.3 @@ -786,6 +786,7 @@ may not count some transitions. .Xr pmc.atom 3 , .Xr pmc.core2 3 , .Xr pmc.iaf 3 , +.Xr pmc.ibs 3 , .Xr pmc.soft 3 , .Xr pmc.tsc 3 , .Xr pmclog 3 , diff --git a/lib/libpmc/pmc.core2.3 b/lib/libpmc/pmc.core2.3 index 86604b7ff16c..7e544fad43b6 100644 --- a/lib/libpmc/pmc.core2.3 +++ b/lib/libpmc/pmc.core2.3 @@ -1101,6 +1101,7 @@ and the underlying hardware events used. .Xr pmc.atom 3 , .Xr pmc.core 3 , .Xr pmc.iaf 3 , +.Xr pmc.ibs 3 , .Xr pmc.soft 3 , .Xr pmc.tsc 3 , .Xr pmc_cpuinfo 3 , diff --git a/lib/libpmc/pmc.iaf.3 b/lib/libpmc/pmc.iaf.3 index eaf45db140f5..c3528e472103 100644 --- a/lib/libpmc/pmc.iaf.3 +++ b/lib/libpmc/pmc.iaf.3 @@ -125,6 +125,7 @@ CPU, use the event specifier .Xr pmc.atom 3 , .Xr pmc.core 3 , .Xr pmc.core2 3 , +.Xr pmc.ibs 3 , .Xr pmc.soft 3 , .Xr pmc.tsc 3 , .Xr pmc_cpuinfo 3 , diff --git a/lib/libpmc/pmc.ibs.3 b/lib/libpmc/pmc.ibs.3 new file mode 100644 index 000000000000..69b90b84556c --- /dev/null +++ b/lib/libpmc/pmc.ibs.3 @@ -0,0 +1,150 @@ +.\" Copyright (c) 2016 Ali Mashtizadeh. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd March 15, 2026 +.Dt PMC.IBS 3 +.Os +.Sh NAME +.Nm pmc.ibs +.Nd Instruction Based Sampling for +.Tn AMD +CPUs +.Sh LIBRARY +.Lb libpmc +.Sh SYNOPSIS +.In pmc.h +.Sh DESCRIPTION +AMD Instruction Based Sampling (IBS) was introduced with the K10 family of +CPUs. +AMD IBS is an alternative approach that samples instructions or micro-ops and +provides a per-instruction or micro-op breakdown of the sources of stalls. +.Pp +Unlike traditional counters, IBS can only be used in the sampling mode and +provides extra data embedded in the callchain. +IBS events set the PMC_F_MULTIPART flag to signify multiple payload types are +contained in the callchain. +The first 8 bytes of the callchain contain four tuples with a one byte type and +a one byte length field. +The regular PMC callchain can be found following the multipart payload. +.Pp +IBS only provides two events that analyze instruction fetches and instruction +execution. +The instruction fetch (ibs-fetch) event provides data on the processor +front-end including reporting instruction cache and TLB events. +The instruction execution (ibs-op) event provides data on the processor +execution including reporting mispredictions, data cache and TLB events. +You should use the AMD PMC counters documented in +.Xr pmc.amd 3 +to analyze stalls relating instruction issue including reservation contention. +.Pp +A guide to analyzing IBS data is provided in Appendix G of the +.Rs +.%B "Software Optimization Guide for AMD Family 10h and 12h Processors" +.%N "Publication No. 40546" +.%D "February 2011" +.%Q "Advanced Micro Devices, Inc." +.Re +A more recent document should be used for decoding all of the flags and fields +in the IBS data. +For example, see the AMD Zen 5 documentation +.Rs +.%B "Processor Programming Reference (PPR) for AMD Family 1Ah Model 02h" +.%N "Publication No. 57238" +.%D "March 6, 2026" +.%Q "Advanced Micro Devices, Inc." +.Re +.Ss PMC Features +AMD IBS supports the following capabilities. +.Bl -column "PMC_CAP_INTERRUPT" "Support" +.It Em Capability Ta Em Support +.It PMC_CAP_CASCADE Ta \&No +.It PMC_CAP_EDGE Ta Yes +.It PMC_CAP_INTERRUPT Ta Yes +.It PMC_CAP_INVERT Ta \&No +.It PMC_CAP_READ Ta \&No +.It PMC_CAP_PRECISE Ta Yes +.It PMC_CAP_SYSTEM Ta Yes +.It PMC_CAP_TAGGING Ta \&No +.It PMC_CAP_THRESHOLD Ta \&No +.It PMC_CAP_USER Ta \&No +.It PMC_CAP_WRITE Ta \&No +.El +.Pp +By default AMD IBS enables the edge, interrupt, system and precise flags. +.Ss Event Qualifiers +Event specifiers for AMD IBS can have the following optional +qualifiers: +.Bl -tag -width "ldlat=value" +.It Li l3miss +Configure IBS to only sample if an l3miss occurred. +.It Li ldlat= Ns Ar value +Configure the counter to only sample events with load latencies above +.Ar ldlat . +IBS only supports filtering latencies that are a multiple of 128 and between +128 and 2048. +Load latency filtering can only be used with ibs-op events and imply the +l3miss qualifier. +.It Li randomize +Randomize the sampling rate. +.El +.Ss AMD IBS Events Specifiers +The IBS event class provides only two event specifiers: +.Bl -tag -width indent +.It Li ibs-fetch Xo +.Op ,l3miss +.Op ,randomize +.Xc +Collect performance samples during instruction fetch. +The +.Ar randomize +qualifier randomly sets the bottom four bits of the sample rate. +.It Li ibs-op Xo +.Op ,l3miss +.Op ,ldlat= Ns Ar ldlat +.Op ,randomize +.Xc +Collect performance samples during instruction execution. +The +.Ar randomize +qualifier, upon reaching the maximum count, restarts the count with a value +between 1 and 127. +.El +.Pp +You may collect both events at the same time. +N.B. AMD discouraged doing so with certain older processors, stating that +sampling both simultaneously perturbs the results. +Please see the processor programming reference for your specific processor. +.Sh SEE ALSO +.Xr pmc 3 , +.Xr pmc.amd 3 , +.Xr pmc.soft 3 , +.Xr pmc.tsc 3 , +.Xr pmclog 3 , +.Xr hwpmc 4 +.Sh HISTORY +AMD IBS support was first introduced in +.Fx 16.0 . +.Sh AUTHORS +AMD IBS support and this manual page were written +.An Ali Mashtizadeh Aq Mt ali@mashtizadeh.com +and sponsored by Netflix, Inc. diff --git a/lib/libpmc/pmc.soft.3 b/lib/libpmc/pmc.soft.3 index 08d5af63d02d..f58b3e8ffa26 100644 --- a/lib/libpmc/pmc.soft.3 +++ b/lib/libpmc/pmc.soft.3 @@ -90,6 +90,7 @@ Write page fault. .Xr pmc.corei7 3 , .Xr pmc.corei7uc 3 , .Xr pmc.iaf 3 , +.Xr pmc.ibs 3 , .Xr pmc.tsc 3 , .Xr pmc.ucf 3 , .Xr pmc.westmereuc 3 , diff --git a/lib/libpmc/pmc.tsc.3 b/lib/libpmc/pmc.tsc.3 index 4834d897f90c..73e2377df0c7 100644 --- a/lib/libpmc/pmc.tsc.3 +++ b/lib/libpmc/pmc.tsc.3 @@ -62,6 +62,7 @@ maps to the TSC. .Xr pmc.core 3 , .Xr pmc.core2 3 , .Xr pmc.iaf 3 , +.Xr pmc.ibs 3 , .Xr pmc.soft 3 , .Xr pmclog 3 , .Xr hwpmc 4 diff --git a/lib/libpmc/pmc.ucf.3 b/lib/libpmc/pmc.ucf.3 index a7cea6bb57f9..37ee0f87a951 100644 --- a/lib/libpmc/pmc.ucf.3 +++ b/lib/libpmc/pmc.ucf.3 @@ -88,6 +88,7 @@ offset C0H under device number 0 and Function 0. .Xr pmc.corei7 3 , .Xr pmc.corei7uc 3 , .Xr pmc.iaf 3 , +.Xr pmc.ibs 3 , .Xr pmc.soft 3 , .Xr pmc.tsc 3 , .Xr pmc.westmere 3 , diff --git a/sys/dev/hwpmc/hwpmc_ibs.h b/sys/dev/hwpmc/hwpmc_ibs.h index 4449b44c8368..01fc88648558 100644 --- a/sys/dev/hwpmc/hwpmc_ibs.h +++ b/sys/dev/hwpmc/hwpmc_ibs.h @@ -67,6 +67,18 @@ #define IBS_CTL_LVTOFFSETVALID (1ULL << 8) #define IBS_CTL_LVTOFFSETMASK 0x0000000F +/* + * The minimum sampling rate was selected to match the default used by other + * counters that was also found to be experimentally stable by providing enough + * time between consecutive NMIs. The maximum sample rate is determined by + * setting all available counter bits, i.e., all available bits except the + * bottom four that are zero extended. + */ +#define IBS_FETCH_MIN_RATE 65536 +#define IBS_FETCH_MAX_RATE 1048560 +#define IBS_OP_MIN_RATE 65536 +#define IBS_OP_MAX_RATE 134217712 + /* IBS Fetch Control */ #define IBS_FETCH_CTL 0xC0011030 /* IBS Fetch Control */ #define IBS_FETCH_CTL_L3MISS (1ULL << 61) /* L3 Cache Miss */ @@ -82,7 +94,8 @@ #define IBS_FETCH_CTL_ENABLE (1ULL << 48) /* Enable */ #define IBS_FETCH_CTL_MAXCNTMASK 0x0000FFFFULL -#define IBS_FETCH_CTL_TO_LAT(_c) ((_c >> 32) & 0x0000FFFF) +#define IBS_FETCH_INTERVAL_TO_CTL(_c) (((_c) >> 4) & 0x0000FFFF) +#define IBS_FETCH_CTL_TO_LAT(_c) (((_c) >> 32) & 0x0000FFFF) #define IBS_FETCH_LINADDR 0xC0011031 /* Fetch Linear Address */ #define IBS_FETCH_PHYSADDR 0xC0011032 /* Fetch Physical Address */ @@ -95,12 +108,16 @@ /* IBS Execution Control */ #define IBS_OP_CTL 0xC0011033 /* IBS Execution Control */ +#define IBS_OP_CTL_LATFLTEN (1ULL << 63) /* Load Latency Filtering */ #define IBS_OP_CTL_COUNTERCONTROL (1ULL << 19) /* Counter Control */ #define IBS_OP_CTL_VALID (1ULL << 18) /* Valid */ #define IBS_OP_CTL_ENABLE (1ULL << 17) /* Enable */ #define IBS_OP_CTL_L3MISSONLY (1ULL << 16) /* L3 Miss Filtering */ #define IBS_OP_CTL_MAXCNTMASK 0x0000FFFFULL +#define IBS_OP_CTL_LDLAT_TO_CTL(_c) ((((ldlat) >> 7) - 1) << 59) +#define IBS_OP_INTERVAL_TO_CTL(_c) ((((_c) >> 4) & 0x0000FFFFULL) | ((_c) & 0x07F00000)) + #define IBS_OP_RIP 0xC0011034 /* IBS Op RIP */ #define IBS_OP_DATA 0xC0011035 /* IBS Op Data */ #define IBS_OP_DATA_RIPINVALID (1ULL << 38) /* RIP Invalid */home | help
Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?69c1a0f7.47fb8.263fa653>
