Date: Mon, 23 Mar 2026 22:12:05 +0100 From: Oliver Pinter <oliver.pntr@gmail.com> To: Mitchell Horne <mhorne@freebsd.org> Cc: "src-committers@freebsd.org" <src-committers@freebsd.org>, "dev-commits-src-all@freebsd.org" <dev-commits-src-all@freebsd.org>, "dev-commits-src-main@freebsd.org" <dev-commits-src-main@freebsd.org>, Ali Mashtizadeh <mashti@uwaterloo.ca> Subject: Re: git: df47355fae72 - main - libpmc: Add support for IBS qualifiers Message-ID: <CAPjTQNG=xAVKitXvuj7Su_yQhrRpHYjRgDW=k3uiSfyah01EdA@mail.gmail.com> In-Reply-To: <69c1a0f7.47fb8.263fa653@gitrepo.freebsd.org>
index | next in thread | previous in thread | raw e-mail
[-- Attachment #1 --] On Monday, March 23, 2026, Mitchell Horne <mhorne@freebsd.org> wrote: > The branch main has been updated by mhorne: > > URL: https://cgit.FreeBSD.org/src/commit/?id= > df47355fae720fd8f63f36a50c8933f8342483d2 > > commit df47355fae720fd8f63f36a50c8933f8342483d2 > Author: Ali Mashtizadeh <mashti@uwaterloo.ca> > AuthorDate: 2026-03-18 04:27:09 +0000 > Commit: Mitchell Horne <mhorne@FreeBSD.org> > CommitDate: 2026-03-23 20:21:28 +0000 > > libpmc: Add support for IBS qualifiers > > Add support to libpmc for parsing the IBS qualifiers and computing the > ctl register value as a function of the qualifiers and the sample rate. > This includes all of the flags available up to AMD Zen 5. Along side > these user facing changes I included the documentation for AMD IBS. > > Reviewed by: mhorne > Sponsored by: Netflix > Pull Request: https://github.com/freebsd/freebsd-src/pull/2081 > --- > lib/libpmc/Makefile | 1 + > lib/libpmc/libpmc.c | 71 ++++++++++++++++++---- > lib/libpmc/pmc.3 | 7 +++ > lib/libpmc/pmc.amd.3 | 1 + > lib/libpmc/pmc.core.3 | 1 + > lib/libpmc/pmc.core2.3 | 1 + > lib/libpmc/pmc.iaf.3 | 1 + > lib/libpmc/pmc.ibs.3 | 150 ++++++++++++++++++++++++++++++ > ++++++++++++++++ > lib/libpmc/pmc.soft.3 | 1 + > lib/libpmc/pmc.tsc.3 | 1 + > lib/libpmc/pmc.ucf.3 | 1 + > sys/dev/hwpmc/hwpmc_ibs.h | 19 +++++- > 12 files changed, 244 insertions(+), 11 deletions(-) > > diff --git a/lib/libpmc/Makefile b/lib/libpmc/Makefile > index 590f719ebff4..442efdc3d9c0 100644 > --- a/lib/libpmc/Makefile > +++ b/lib/libpmc/Makefile > @@ -74,6 +74,7 @@ MAN+= pmc.haswell.3 > MAN+= pmc.haswelluc.3 > MAN+= pmc.haswellxeon.3 > MAN+= pmc.iaf.3 > +MAN+= pmc.ibs.3 > MAN+= pmc.ivybridge.3 > MAN+= pmc.ivybridgexeon.3 > MAN+= pmc.sandybridge.3 > diff --git a/lib/libpmc/libpmc.c b/lib/libpmc/libpmc.c > index ceba40aa7b39..ebb642e8d16b 100644 > --- a/lib/libpmc/libpmc.c > +++ b/lib/libpmc/libpmc.c > @@ -696,7 +696,7 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec, > struct pmc_op_pmcallocate *pmc_config) > { > char *e, *p, *q; > - uint64_t ctl; > + uint64_t ctl, ldlat; > > pmc_config->pm_caps |= > (PMC_CAP_SYSTEM | PMC_CAP_EDGE | PMC_CAP_PRECISE); > @@ -714,23 +714,74 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec, > return (-1); > } > > + /* IBS only supports sampling mode */ > + if (!PMC_IS_SAMPLING_MODE(pmc_config->pm_mode)) { > + return (-1); > + } > + > /* parse parameters */ > - while ((p = strsep(&ctrspec, ",")) != NULL) { > - if (KWPREFIXMATCH(p, "ctl=")) { > - q = strchr(p, '='); > - if (*++q == '\0') /* skip '=' */ > + ctl = 0; > + if (pe == PMC_EV_IBS_FETCH) { > + while ((p = strsep(&ctrspec, ",")) != NULL) { > + if (KWMATCH(p, "l3miss")) { > + ctl |= IBS_FETCH_CTL_L3MISSONLY; > + } else if (KWMATCH(p, "randomize")) { > + ctl |= IBS_FETCH_CTL_RANDOMIZE; > + } else { > return (-1); > + } > + } > > - ctl = strtoull(q, &e, 0); > - if (e == q || *e != '\0') > + if (pmc_config->pm_count < IBS_FETCH_MIN_RATE || > + pmc_config->pm_count > IBS_FETCH_MAX_RATE) > + return (-1); > + > + ctl |= IBS_FETCH_INTERVAL_TO_CTL(pmc_config->pm_count); > + } else { > + while ((p = strsep(&ctrspec, ",")) != NULL) { > + if (KWMATCH(p, "l3miss")) { > + ctl |= IBS_OP_CTL_L3MISSONLY; > + } else if (KWPREFIXMATCH(p, "ldlat=")) { > + q = strchr(p, '='); > + if (*++q == '\0') /* skip '=' */ > + return (-1); > + > + ldlat = strtoull(q, &e, 0); > + if (e == q || *e != '\0') > + return (-1); > + > + /* > + * IBS load latency filtering requires the > + * latency to be a multiple of 128 and > between > + * 128 and 2048. The latency is stored in > the > + * IbsOpLatThrsh field, which only contains > + * four bits so the processor computes > + * (IbsOpLatThrsh+1)*128 as the value. > + * > + * AMD PPR Vol 1 for AMD Family 1Ah Model > 02h > + * C1 (57238) 2026-03-06 Revision 0.49. > + */ > + if (ldlat < 128 || ldlat > 2048) > + return (-1); > + ctl |= IBS_OP_CTL_LDLAT_TO_CTL(ldlat); > + ctl |= IBS_OP_CTL_L3MISSONLY | > IBS_OP_CTL_LATFLTEN; > + } else if (KWMATCH(p, "randomize")) { > + ctl |= IBS_OP_CTL_COUNTERCONTROL; > + } else { > return (-1); > + } > + } > > - pmc_config->pm_md.pm_ibs.ibs_ctl |= ctl; > - } else { > + if (pmc_config->pm_count < IBS_OP_MIN_RATE || > + pmc_config->pm_count > IBS_OP_MAX_RATE) > return (-1); > - } > + > + ctl |= IBS_OP_INTERVAL_TO_CTL(pmc_config->pm_count); > } > > + > + pmc_config->pm_md.pm_ibs.ibs_ctl |= ctl; > + > return (0); > } > > diff --git a/lib/libpmc/pmc.3 b/lib/libpmc/pmc.3 > index 9a5b599759ff..cb28e0b786b9 100644 > --- a/lib/libpmc/pmc.3 > +++ b/lib/libpmc/pmc.3 > @@ -224,6 +224,11 @@ performance measurement architecture version 2 and > later. > Programmable hardware counters present in CPUs conforming to the > .Tn Intel > performance measurement architecture version 1 and later. > +.It Li PMC_CLASS_IBS > +.Tn AMD > +Instruction Based Sampling (IBS) counters present in > +.Tn AMD > +Family 10h and above. > .It Li PMC_CLASS_K8 > Programmable hardware counters present in > .Tn "AMD Athlon64" > @@ -491,6 +496,7 @@ following manual pages: > .It Em "PMC Class" Ta Em "Manual Page" > .It Li PMC_CLASS_IAF Ta Xr pmc.iaf 3 > .It Li PMC_CLASS_IAP Ta Xr pmc.atom 3 , Xr pmc.core 3 , Xr pmc.core2 3 > +.It Li PMC_CLASS_IBS Ta Xr pmc.ibs 3 > .It Li PMC_CLASS_K8 Ta Xr pmc.amd 3 > .It Li PMC_CLASS_TSC Ta Xr pmc.tsc 3 > .El > @@ -542,6 +548,7 @@ Doing otherwise is unsupported. > .Xr pmc.haswelluc 3 , > .Xr pmc.haswellxeon 3 , > .Xr pmc.iaf 3 , > +.Xr pmc.ibs 3 , > .Xr pmc.ivybridge 3 , > .Xr pmc.ivybridgexeon 3 , > .Xr pmc.sandybridge 3 , > diff --git a/lib/libpmc/pmc.amd.3 b/lib/libpmc/pmc.amd.3 > index 047b31aa78bb..75c6331b000f 100644 > --- a/lib/libpmc/pmc.amd.3 > +++ b/lib/libpmc/pmc.amd.3 > @@ -777,6 +777,7 @@ and the underlying hardware events used. > .Xr pmc.core 3 , > .Xr pmc.core2 3 , > .Xr pmc.iaf 3 , > +.Xr pmc.ibs 3 , > .Xr pmc.soft 3 , > .Xr pmc.tsc 3 , > .Xr pmclog 3 , > diff --git a/lib/libpmc/pmc.core.3 b/lib/libpmc/pmc.core.3 > index b4fa9ab661a4..4c41e7c7ad3b 100644 > --- a/lib/libpmc/pmc.core.3 > +++ b/lib/libpmc/pmc.core.3 > @@ -786,6 +786,7 @@ may not count some transitions. > .Xr pmc.atom 3 , > .Xr pmc.core2 3 , > .Xr pmc.iaf 3 , > +.Xr pmc.ibs 3 , > .Xr pmc.soft 3 , > .Xr pmc.tsc 3 , > .Xr pmclog 3 , > diff --git a/lib/libpmc/pmc.core2.3 b/lib/libpmc/pmc.core2.3 > index 86604b7ff16c..7e544fad43b6 100644 > --- a/lib/libpmc/pmc.core2.3 > +++ b/lib/libpmc/pmc.core2.3 > @@ -1101,6 +1101,7 @@ and the underlying hardware events used. > .Xr pmc.atom 3 , > .Xr pmc.core 3 , > .Xr pmc.iaf 3 , > +.Xr pmc.ibs 3 , > .Xr pmc.soft 3 , > .Xr pmc.tsc 3 , > .Xr pmc_cpuinfo 3 , > diff --git a/lib/libpmc/pmc.iaf.3 b/lib/libpmc/pmc.iaf.3 > index eaf45db140f5..c3528e472103 100644 > --- a/lib/libpmc/pmc.iaf.3 > +++ b/lib/libpmc/pmc.iaf.3 > @@ -125,6 +125,7 @@ CPU, use the event specifier > .Xr pmc.atom 3 , > .Xr pmc.core 3 , > .Xr pmc.core2 3 , > +.Xr pmc.ibs 3 , > .Xr pmc.soft 3 , > .Xr pmc.tsc 3 , > .Xr pmc_cpuinfo 3 , > diff --git a/lib/libpmc/pmc.ibs.3 b/lib/libpmc/pmc.ibs.3 > new file mode 100644 > index 000000000000..69b90b84556c > --- /dev/null > +++ b/lib/libpmc/pmc.ibs.3 > @@ -0,0 +1,150 @@ > +.\" Copyright (c) 2016 Ali Mashtizadeh. All rights reserved. Isn't this 2026? > +.\" > +.\" Redistribution and use in source and binary forms, with or without > +.\" modification, are permitted provided that the following conditions > +.\" are met: > +.\" 1. Redistributions of source code must retain the above copyright > +.\" notice, this list of conditions and the following disclaimer. > +.\" 2. Redistributions in binary form must reproduce the above copyright > +.\" notice, this list of conditions and the following disclaimer in the > +.\" documentation and/or other materials provided with the > distribution. > +.\" > +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND > +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE > +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR > PURPOSE > +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE > LIABLE > +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR > CONSEQUENTIAL > +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE > GOODS > +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) > +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, > STRICT > +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY > WAY > +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF > +.\" SUCH DAMAGE. > +.\" > +.Dd March 15, 2026 > +.Dt PMC.IBS 3 > +.Os > +.Sh NAME > +.Nm pmc.ibs > +.Nd Instruction Based Sampling for > +.Tn AMD > +CPUs > +.Sh LIBRARY > +.Lb libpmc > +.Sh SYNOPSIS > +.In pmc.h > +.Sh DESCRIPTION > +AMD Instruction Based Sampling (IBS) was introduced with the K10 family of > +CPUs. > +AMD IBS is an alternative approach that samples instructions or micro-ops > and > +provides a per-instruction or micro-op breakdown of the sources of stalls. > +.Pp > +Unlike traditional counters, IBS can only be used in the sampling mode and > +provides extra data embedded in the callchain. > +IBS events set the PMC_F_MULTIPART flag to signify multiple payload types > are > +contained in the callchain. > +The first 8 bytes of the callchain contain four tuples with a one byte > type and > +a one byte length field. > +The regular PMC callchain can be found following the multipart payload. > +.Pp > +IBS only provides two events that analyze instruction fetches and > instruction > +execution. > +The instruction fetch (ibs-fetch) event provides data on the processor > +front-end including reporting instruction cache and TLB events. > +The instruction execution (ibs-op) event provides data on the processor > +execution including reporting mispredictions, data cache and TLB events. > +You should use the AMD PMC counters documented in > +.Xr pmc.amd 3 > +to analyze stalls relating instruction issue including reservation > contention. > +.Pp > +A guide to analyzing IBS data is provided in Appendix G of the > +.Rs > +.%B "Software Optimization Guide for AMD Family 10h and 12h Processors" > +.%N "Publication No. 40546" > +.%D "February 2011" > +.%Q "Advanced Micro Devices, Inc." > +.Re > +A more recent document should be used for decoding all of the flags and > fields > +in the IBS data. > +For example, see the AMD Zen 5 documentation > +.Rs > +.%B "Processor Programming Reference (PPR) for AMD Family 1Ah Model 02h" > +.%N "Publication No. 57238" > +.%D "March 6, 2026" > +.%Q "Advanced Micro Devices, Inc." > +.Re > +.Ss PMC Features > +AMD IBS supports the following capabilities. > +.Bl -column "PMC_CAP_INTERRUPT" "Support" > +.It Em Capability Ta Em Support > +.It PMC_CAP_CASCADE Ta \&No > +.It PMC_CAP_EDGE Ta Yes > +.It PMC_CAP_INTERRUPT Ta Yes > +.It PMC_CAP_INVERT Ta \&No > +.It PMC_CAP_READ Ta \&No > +.It PMC_CAP_PRECISE Ta Yes > +.It PMC_CAP_SYSTEM Ta Yes > +.It PMC_CAP_TAGGING Ta \&No > +.It PMC_CAP_THRESHOLD Ta \&No > +.It PMC_CAP_USER Ta \&No > +.It PMC_CAP_WRITE Ta \&No > +.El > +.Pp > +By default AMD IBS enables the edge, interrupt, system and precise flags. > +.Ss Event Qualifiers > +Event specifiers for AMD IBS can have the following optional > +qualifiers: > +.Bl -tag -width "ldlat=value" > +.It Li l3miss > +Configure IBS to only sample if an l3miss occurred. > +.It Li ldlat= Ns Ar value > +Configure the counter to only sample events with load latencies above > +.Ar ldlat . > +IBS only supports filtering latencies that are a multiple of 128 and > between > +128 and 2048. > +Load latency filtering can only be used with ibs-op events and imply the > +l3miss qualifier. > +.It Li randomize > +Randomize the sampling rate. > +.El > +.Ss AMD IBS Events Specifiers > +The IBS event class provides only two event specifiers: > +.Bl -tag -width indent > +.It Li ibs-fetch Xo > +.Op ,l3miss > +.Op ,randomize > +.Xc > +Collect performance samples during instruction fetch. > +The > +.Ar randomize > +qualifier randomly sets the bottom four bits of the sample rate. > +.It Li ibs-op Xo > +.Op ,l3miss > +.Op ,ldlat= Ns Ar ldlat > +.Op ,randomize > +.Xc > +Collect performance samples during instruction execution. > +The > +.Ar randomize > +qualifier, upon reaching the maximum count, restarts the count with a > value > +between 1 and 127. > +.El > +.Pp > +You may collect both events at the same time. > +N.B. AMD discouraged doing so with certain older processors, stating that > +sampling both simultaneously perturbs the results. > +Please see the processor programming reference for your specific > processor. > +.Sh SEE ALSO > +.Xr pmc 3 , > +.Xr pmc.amd 3 , > +.Xr pmc.soft 3 , > +.Xr pmc.tsc 3 , > +.Xr pmclog 3 , > +.Xr hwpmc 4 > +.Sh HISTORY > +AMD IBS support was first introduced in > +.Fx 16.0 . > +.Sh AUTHORS > +AMD IBS support and this manual page were written > +.An Ali Mashtizadeh Aq Mt ali@mashtizadeh.com > +and sponsored by Netflix, Inc. > diff --git a/lib/libpmc/pmc.soft.3 b/lib/libpmc/pmc.soft.3 > index 08d5af63d02d..f58b3e8ffa26 100644 > --- a/lib/libpmc/pmc.soft.3 > +++ b/lib/libpmc/pmc.soft.3 > @@ -90,6 +90,7 @@ Write page fault. > .Xr pmc.corei7 3 , > .Xr pmc.corei7uc 3 , > .Xr pmc.iaf 3 , > +.Xr pmc.ibs 3 , > .Xr pmc.tsc 3 , > .Xr pmc.ucf 3 , > .Xr pmc.westmereuc 3 , > diff --git a/lib/libpmc/pmc.tsc.3 b/lib/libpmc/pmc.tsc.3 > index 4834d897f90c..73e2377df0c7 100644 > --- a/lib/libpmc/pmc.tsc.3 > +++ b/lib/libpmc/pmc.tsc.3 > @@ -62,6 +62,7 @@ maps to the TSC. > .Xr pmc.core 3 , > .Xr pmc.core2 3 , > .Xr pmc.iaf 3 , > +.Xr pmc.ibs 3 , > .Xr pmc.soft 3 , > .Xr pmclog 3 , > .Xr hwpmc 4 > diff --git a/lib/libpmc/pmc.ucf.3 b/lib/libpmc/pmc.ucf.3 > index a7cea6bb57f9..37ee0f87a951 100644 > --- a/lib/libpmc/pmc.ucf.3 > +++ b/lib/libpmc/pmc.ucf.3 > @@ -88,6 +88,7 @@ offset C0H under device number 0 and Function 0. > .Xr pmc.corei7 3 , > .Xr pmc.corei7uc 3 , > .Xr pmc.iaf 3 , > +.Xr pmc.ibs 3 , > .Xr pmc.soft 3 , > .Xr pmc.tsc 3 , > .Xr pmc.westmere 3 , > diff --git a/sys/dev/hwpmc/hwpmc_ibs.h b/sys/dev/hwpmc/hwpmc_ibs.h > index 4449b44c8368..01fc88648558 100644 > --- a/sys/dev/hwpmc/hwpmc_ibs.h > +++ b/sys/dev/hwpmc/hwpmc_ibs.h > @@ -67,6 +67,18 @@ > #define IBS_CTL_LVTOFFSETVALID (1ULL << 8) > #define IBS_CTL_LVTOFFSETMASK 0x0000000F > > +/* > + * The minimum sampling rate was selected to match the default used by > other > + * counters that was also found to be experimentally stable by providing > enough > + * time between consecutive NMIs. The maximum sample rate is determined > by > + * setting all available counter bits, i.e., all available bits except the > + * bottom four that are zero extended. > + */ > +#define IBS_FETCH_MIN_RATE 65536 > +#define IBS_FETCH_MAX_RATE 1048560 > +#define IBS_OP_MIN_RATE 65536 > +#define IBS_OP_MAX_RATE 134217712 > + > /* IBS Fetch Control */ > #define IBS_FETCH_CTL 0xC0011030 /* IBS Fetch Control */ > #define IBS_FETCH_CTL_L3MISS (1ULL << 61) /* L3 Cache Miss */ > @@ -82,7 +94,8 @@ > #define IBS_FETCH_CTL_ENABLE (1ULL << 48) /* Enable */ > #define IBS_FETCH_CTL_MAXCNTMASK 0x0000FFFFULL > > -#define IBS_FETCH_CTL_TO_LAT(_c) ((_c >> 32) & 0x0000FFFF) > +#define IBS_FETCH_INTERVAL_TO_CTL(_c) (((_c) >> 4) & 0x0000FFFF) > +#define IBS_FETCH_CTL_TO_LAT(_c) (((_c) >> 32) & 0x0000FFFF) > > #define IBS_FETCH_LINADDR 0xC0011031 /* Fetch Linear Address > */ > #define IBS_FETCH_PHYSADDR 0xC0011032 /* Fetch Physical > Address */ > @@ -95,12 +108,16 @@ > > /* IBS Execution Control */ > #define IBS_OP_CTL 0xC0011033 /* IBS Execution > Control */ > +#define IBS_OP_CTL_LATFLTEN (1ULL << 63) /* Load Latency > Filtering */ > #define IBS_OP_CTL_COUNTERCONTROL (1ULL << 19) /* Counter Control */ > #define IBS_OP_CTL_VALID (1ULL << 18) /* Valid */ > #define IBS_OP_CTL_ENABLE (1ULL << 17) /* Enable */ > #define IBS_OP_CTL_L3MISSONLY (1ULL << 16) /* L3 Miss Filtering > */ > #define IBS_OP_CTL_MAXCNTMASK 0x0000FFFFULL > > +#define IBS_OP_CTL_LDLAT_TO_CTL(_c) ((((ldlat) >> 7) - 1) << 59) > +#define IBS_OP_INTERVAL_TO_CTL(_c) ((((_c) >> 4) & 0x0000FFFFULL) | > ((_c) & 0x07F00000)) > + > #define IBS_OP_RIP 0xC0011034 /* IBS Op RIP */ > #define IBS_OP_DATA 0xC0011035 /* IBS Op Data */ > #define IBS_OP_DATA_RIPINVALID (1ULL << 38) /* RIP Invalid */ > > [-- Attachment #2 --] <br><br>On Monday, March 23, 2026, Mitchell Horne <<a href="mailto:mhorne@freebsd.org">mhorne@freebsd.org</a>> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">The branch main has been updated by mhorne:<br> <br> URL: <a href="https://cgit.FreeBSD.org/src/commit/?id=df47355fae720fd8f63f36a50c8933f8342483d2" target="_blank">https://cgit.FreeBSD.org/src/<wbr>commit/?id=<wbr>df47355fae720fd8f63f36a50c8933<wbr>f8342483d2</a><br> <br> commit df47355fae720fd8f63f36a50c8933<wbr>f8342483d2<br> Author: Ali Mashtizadeh <<a href="mailto:mashti@uwaterloo.ca">mashti@uwaterloo.ca</a>><br> AuthorDate: 2026-03-18 04:27:09 +0000<br> Commit: Mitchell Horne <mhorne@FreeBSD.org><br> CommitDate: 2026-03-23 20:21:28 +0000<br> <br> libpmc: Add support for IBS qualifiers<br> <br> Add support to libpmc for parsing the IBS qualifiers and computing the<br> ctl register value as a function of the qualifiers and the sample rate.<br> This includes all of the flags available up to AMD Zen 5. Along side<br> these user facing changes I included the documentation for AMD IBS.<br> <br> Reviewed by: mhorne<br> Sponsored by: Netflix<br> Pull Request: <a href="https://github.com/freebsd/freebsd-src/pull/2081" target="_blank">https://github.com/freebsd/<wbr>freebsd-src/pull/2081</a><br> ---<br> lib/libpmc/Makefile | 1 +<br> lib/libpmc/libpmc.c | 71 ++++++++++++++++++----<br> lib/libpmc/pmc.3 | 7 +++<br> lib/libpmc/pmc.amd.3 | 1 +<br> lib/libpmc/pmc.core.3 | 1 +<br> lib/libpmc/pmc.core2.3 | 1 +<br> lib/libpmc/pmc.iaf.3 | 1 +<br> lib/libpmc/pmc.ibs.3 | 150 ++++++++++++++++++++++++++++++<wbr>++++++++++++++++<br> lib/libpmc/pmc.soft.3 | 1 +<br> lib/libpmc/pmc.tsc.3 | 1 +<br> lib/libpmc/pmc.ucf.3 | 1 +<br> sys/dev/hwpmc/hwpmc_ibs.h | 19 +++++-<br> 12 files changed, 244 insertions(+), 11 deletions(-)<br> <br> diff --git a/lib/libpmc/Makefile b/lib/libpmc/Makefile<br> index 590f719ebff4..442efdc3d9c0 100644<br> --- a/lib/libpmc/Makefile<br> +++ b/lib/libpmc/Makefile<br> @@ -74,6 +74,7 @@ MAN+= pmc.haswell.3<br> MAN+= pmc.haswelluc.3<br> MAN+= pmc.haswellxeon.3<br> MAN+= pmc.iaf.3<br> +MAN+= pmc.ibs.3<br> MAN+= pmc.ivybridge.3<br> MAN+= pmc.ivybridgexeon.3<br> MAN+= pmc.sandybridge.3<br> diff --git a/lib/libpmc/libpmc.c b/lib/libpmc/libpmc.c<br> index ceba40aa7b39..ebb642e8d16b 100644<br> --- a/lib/libpmc/libpmc.c<br> +++ b/lib/libpmc/libpmc.c<br> @@ -696,7 +696,7 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec,<br> struct pmc_op_pmcallocate *pmc_config)<br> {<br> char *e, *p, *q;<br> - uint64_t ctl;<br> + uint64_t ctl, ldlat;<br> <br> pmc_config->pm_caps |=<br> (PMC_CAP_SYSTEM | PMC_CAP_EDGE | PMC_CAP_PRECISE);<br> @@ -714,23 +714,74 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec,<br> return (-1);<br> }<br> <br> + /* IBS only supports sampling mode */<br> + if (!PMC_IS_SAMPLING_MODE(pmc_<wbr>config->pm_mode)) {<br> + return (-1);<br> + }<br> +<br> /* parse parameters */<br> - while ((p = strsep(&ctrspec, ",")) != NULL) {<br> - if (KWPREFIXMATCH(p, "ctl=")) {<br> - q = strchr(p, '=');<br> - if (*++q == '\0') /* skip '=' */<br> + ctl = 0;<br> + if (pe == PMC_EV_IBS_FETCH) {<br> + while ((p = strsep(&ctrspec, ",")) != NULL) {<br> + if (KWMATCH(p, "l3miss")) {<br> + ctl |= IBS_FETCH_CTL_L3MISSONLY;<br> + } else if (KWMATCH(p, "randomize")) {<br> + ctl |= IBS_FETCH_CTL_RANDOMIZE;<br> + } else {<br> return (-1);<br> + }<br> + }<br> <br> - ctl = strtoull(q, &e, 0);<br> - if (e == q || *e != '\0')<br> + if (pmc_config->pm_count < IBS_FETCH_MIN_RATE ||<br> + pmc_config->pm_count > IBS_FETCH_MAX_RATE)<br> + return (-1);<br> +<br> + ctl |= IBS_FETCH_INTERVAL_TO_CTL(pmc_<wbr>config->pm_count);<br> + } else {<br> + while ((p = strsep(&ctrspec, ",")) != NULL) {<br> + if (KWMATCH(p, "l3miss")) {<br> + ctl |= IBS_OP_CTL_L3MISSONLY;<br> + } else if (KWPREFIXMATCH(p, "ldlat=")) {<br> + q = strchr(p, '=');<br> + if (*++q == '\0') /* skip '=' */<br> + return (-1);<br> +<br> + ldlat = strtoull(q, &e, 0);<br> + if (e == q || *e != '\0')<br> + return (-1);<br> +<br> + /*<br> + * IBS load latency filtering requires the<br> + * latency to be a multiple of 128 and between<br> + * 128 and 2048. The latency is stored in the<br> + * IbsOpLatThrsh field, which only contains<br> + * four bits so the processor computes<br> + * (IbsOpLatThrsh+1)*128 as the value.<br> + *<br> + * AMD PPR Vol 1 for AMD Family 1Ah Model 02h<br> + * C1 (57238) 2026-03-06 Revision 0.49.<br> + */<br> + if (ldlat < 128 || ldlat > 2048)<br> + return (-1);<br> + ctl |= IBS_OP_CTL_LDLAT_TO_CTL(ldlat)<wbr>;<br> + ctl |= IBS_OP_CTL_L3MISSONLY | IBS_OP_CTL_LATFLTEN;<br> + } else if (KWMATCH(p, "randomize")) {<br> + ctl |= IBS_OP_CTL_COUNTERCONTROL;<br> + } else {<br> return (-1);<br> + }<br> + }<br> <br> - pmc_config->pm_md.pm_ibs.ibs_<wbr>ctl |= ctl;<br> - } else {<br> + if (pmc_config->pm_count < IBS_OP_MIN_RATE ||<br> + pmc_config->pm_count > IBS_OP_MAX_RATE)<br> return (-1);<br> - }<br> +<br> + ctl |= IBS_OP_INTERVAL_TO_CTL(pmc_<wbr>config->pm_count);<br> }<br> <br> +<br> + pmc_config->pm_md.pm_ibs.ibs_<wbr>ctl |= ctl;<br> +<br> return (0);<br> }<br> <br> diff --git a/lib/libpmc/pmc.3 b/lib/libpmc/pmc.3<br> index 9a5b599759ff..cb28e0b786b9 100644<br> --- a/lib/libpmc/pmc.3<br> +++ b/lib/libpmc/pmc.3<br> @@ -224,6 +224,11 @@ performance measurement architecture version 2 and later.<br> Programmable hardware counters present in CPUs conforming to the<br> .Tn Intel<br> performance measurement architecture version 1 and later.<br> +.It Li PMC_CLASS_IBS<br> +.Tn AMD<br> +Instruction Based Sampling (IBS) counters present in<br> +.Tn AMD<br> +Family 10h and above.<br> .It Li PMC_CLASS_K8<br> Programmable hardware counters present in<br> .Tn "AMD Athlon64"<br> @@ -491,6 +496,7 @@ following manual pages:<br> .It Em "PMC Class" Ta Em "Manual Page"<br> .It Li PMC_CLASS_IAF Ta Xr pmc.iaf 3<br> .It Li PMC_CLASS_IAP Ta Xr pmc.atom 3 , Xr pmc.core 3 , Xr pmc.core2 3<br> +.It Li PMC_CLASS_IBS Ta Xr pmc.ibs 3<br> .It Li PMC_CLASS_K8 Ta Xr pmc.amd 3<br> .It Li PMC_CLASS_TSC Ta Xr pmc.tsc 3<br> .El<br> @@ -542,6 +548,7 @@ Doing otherwise is unsupported.<br> .Xr pmc.haswelluc 3 ,<br> .Xr pmc.haswellxeon 3 ,<br> .Xr pmc.iaf 3 ,<br> +.Xr pmc.ibs 3 ,<br> .Xr pmc.ivybridge 3 ,<br> .Xr pmc.ivybridgexeon 3 ,<br> .Xr pmc.sandybridge 3 ,<br> diff --git a/lib/libpmc/pmc.amd.3 b/lib/libpmc/pmc.amd.3<br> index 047b31aa78bb..75c6331b000f 100644<br> --- a/lib/libpmc/pmc.amd.3<br> +++ b/lib/libpmc/pmc.amd.3<br> @@ -777,6 +777,7 @@ and the underlying hardware events used.<br> .Xr pmc.core 3 ,<br> .Xr pmc.core2 3 ,<br> .Xr pmc.iaf 3 ,<br> +.Xr pmc.ibs 3 ,<br> .Xr pmc.soft 3 ,<br> .Xr pmc.tsc 3 ,<br> .Xr pmclog 3 ,<br> diff --git a/lib/libpmc/pmc.core.3 b/lib/libpmc/pmc.core.3<br> index b4fa9ab661a4..4c41e7c7ad3b 100644<br> --- a/lib/libpmc/pmc.core.3<br> +++ b/lib/libpmc/pmc.core.3<br> @@ -786,6 +786,7 @@ may not count some transitions.<br> .Xr pmc.atom 3 ,<br> .Xr pmc.core2 3 ,<br> .Xr pmc.iaf 3 ,<br> +.Xr pmc.ibs 3 ,<br> .Xr pmc.soft 3 ,<br> .Xr pmc.tsc 3 ,<br> .Xr pmclog 3 ,<br> diff --git a/lib/libpmc/pmc.core2.3 b/lib/libpmc/pmc.core2.3<br> index 86604b7ff16c..7e544fad43b6 100644<br> --- a/lib/libpmc/pmc.core2.3<br> +++ b/lib/libpmc/pmc.core2.3<br> @@ -1101,6 +1101,7 @@ and the underlying hardware events used.<br> .Xr pmc.atom 3 ,<br> .Xr pmc.core 3 ,<br> .Xr pmc.iaf 3 ,<br> +.Xr pmc.ibs 3 ,<br> .Xr pmc.soft 3 ,<br> .Xr pmc.tsc 3 ,<br> .Xr pmc_cpuinfo 3 ,<br> diff --git a/lib/libpmc/pmc.iaf.3 b/lib/libpmc/pmc.iaf.3<br> index eaf45db140f5..c3528e472103 100644<br> --- a/lib/libpmc/pmc.iaf.3<br> +++ b/lib/libpmc/pmc.iaf.3<br> @@ -125,6 +125,7 @@ CPU, use the event specifier<br> .Xr pmc.atom 3 ,<br> .Xr pmc.core 3 ,<br> .Xr pmc.core2 3 ,<br> +.Xr pmc.ibs 3 ,<br> .Xr pmc.soft 3 ,<br> .Xr pmc.tsc 3 ,<br> .Xr pmc_cpuinfo 3 ,<br> diff --git a/lib/libpmc/pmc.ibs.3 b/lib/libpmc/pmc.ibs.3<br> new file mode 100644<br> index 000000000000..69b90b84556c<br> --- /dev/null<br> +++ b/lib/libpmc/pmc.ibs.3<br> @@ -0,0 +1,150 @@<br> +.\" Copyright (c) 2016 Ali Mashtizadeh. All rights reserved.</blockquote><div><br></div><div>Isn't this 2026?</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"> +.\"<br> +.\" Redistribution and use in source and binary forms, with or without<br> +.\" modification, are permitted provided that the following conditions<br> +.\" are met:<br> +.\" 1. Redistributions of source code must retain the above copyright<br> +.\" notice, this list of conditions and the following disclaimer.<br> +.\" 2. Redistributions in binary form must reproduce the above copyright<br> +.\" notice, this list of conditions and the following disclaimer in the<br> +.\" documentation and/or other materials provided with the distribution.<br> +.\"<br> +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND<br> +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE<br> +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE<br> +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE<br> +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL<br> +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS<br> +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)<br> +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT<br> +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY<br> +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF<br> +.\" SUCH DAMAGE.<br> +.\"<br> +.Dd March 15, 2026<br> +.Dt PMC.IBS 3<br> +.Os<br> +.Sh NAME<br> +.Nm pmc.ibs<br> +.Nd Instruction Based Sampling for<br> +.Tn AMD<br> +CPUs<br> +.Sh LIBRARY<br> +.Lb libpmc<br> +.Sh SYNOPSIS<br> +.In pmc.h<br> +.Sh DESCRIPTION<br> +AMD Instruction Based Sampling (IBS) was introduced with the K10 family of<br> +CPUs.<br> +AMD IBS is an alternative approach that samples instructions or micro-ops and<br> +provides a per-instruction or micro-op breakdown of the sources of stalls.<br> +.Pp<br> +Unlike traditional counters, IBS can only be used in the sampling mode and<br> +provides extra data embedded in the callchain.<br> +IBS events set the PMC_F_MULTIPART flag to signify multiple payload types are<br> +contained in the callchain.<br> +The first 8 bytes of the callchain contain four tuples with a one byte type and<br> +a one byte length field.<br> +The regular PMC callchain can be found following the multipart payload.<br> +.Pp<br> +IBS only provides two events that analyze instruction fetches and instruction<br> +execution.<br> +The instruction fetch (ibs-fetch) event provides data on the processor<br> +front-end including reporting instruction cache and TLB events.<br> +The instruction execution (ibs-op) event provides data on the processor<br> +execution including reporting mispredictions, data cache and TLB events.<br> +You should use the AMD PMC counters documented in<br> +.Xr pmc.amd 3<br> +to analyze stalls relating instruction issue including reservation contention.<br> +.Pp<br> +A guide to analyzing IBS data is provided in Appendix G of the<br> +.Rs<br> +.%B "Software Optimization Guide for AMD Family 10h and 12h Processors"<br> +.%N "Publication No. 40546"<br> +.%D "February 2011"<br> +.%Q "Advanced Micro Devices, Inc."<br> +.Re<br> +A more recent document should be used for decoding all of the flags and fields<br> +in the IBS data.<br> +For example, see the AMD Zen 5 documentation<br> +.Rs<br> +.%B "Processor Programming Reference (PPR) for AMD Family 1Ah Model 02h"<br> +.%N "Publication No. 57238"<br> +.%D "March 6, 2026"<br> +.%Q "Advanced Micro Devices, Inc."<br> +.Re<br> +.Ss PMC Features<br> +AMD IBS supports the following capabilities.<br> +.Bl -column "PMC_CAP_INTERRUPT" "Support"<br> +.It Em Capability Ta Em Support<br> +.It PMC_CAP_CASCADE Ta \&No<br> +.It PMC_CAP_EDGE Ta Yes<br> +.It PMC_CAP_INTERRUPT Ta Yes<br> +.It PMC_CAP_INVERT Ta \&No<br> +.It PMC_CAP_READ Ta \&No<br> +.It PMC_CAP_PRECISE Ta Yes<br> +.It PMC_CAP_SYSTEM Ta Yes<br> +.It PMC_CAP_TAGGING Ta \&No<br> +.It PMC_CAP_THRESHOLD Ta \&No<br> +.It PMC_CAP_USER Ta \&No<br> +.It PMC_CAP_WRITE Ta \&No<br> +.El<br> +.Pp<br> +By default AMD IBS enables the edge, interrupt, system and precise flags.<br> +.Ss Event Qualifiers<br> +Event specifiers for AMD IBS can have the following optional<br> +qualifiers:<br> +.Bl -tag -width "ldlat=value"<br> +.It Li l3miss<br> +Configure IBS to only sample if an l3miss occurred.<br> +.It Li ldlat= Ns Ar value<br> +Configure the counter to only sample events with load latencies above<br> +.Ar ldlat .<br> +IBS only supports filtering latencies that are a multiple of 128 and between<br> +128 and 2048.<br> +Load latency filtering can only be used with ibs-op events and imply the<br> +l3miss qualifier.<br> +.It Li randomize<br> +Randomize the sampling rate.<br> +.El<br> +.Ss AMD IBS Events Specifiers<br> +The IBS event class provides only two event specifiers:<br> +.Bl -tag -width indent<br> +.It Li ibs-fetch Xo<br> +.Op ,l3miss<br> +.Op ,randomize<br> +.Xc<br> +Collect performance samples during instruction fetch.<br> +The<br> +.Ar randomize<br> +qualifier randomly sets the bottom four bits of the sample rate.<br> +.It Li ibs-op Xo<br> +.Op ,l3miss<br> +.Op ,ldlat= Ns Ar ldlat<br> +.Op ,randomize<br> +.Xc<br> +Collect performance samples during instruction execution.<br> +The<br> +.Ar randomize<br> +qualifier, upon reaching the maximum count, restarts the count with a value<br> +between 1 and 127.<br> +.El<br> +.Pp<br> +You may collect both events at the same time.<br> +N.B. AMD discouraged doing so with certain older processors, stating that<br> +sampling both simultaneously perturbs the results.<br> +Please see the processor programming reference for your specific processor.<br> +.Sh SEE ALSO<br> +.Xr pmc 3 ,<br> +.Xr pmc.amd 3 ,<br> +.Xr pmc.soft 3 ,<br> +.Xr pmc.tsc 3 ,<br> +.Xr pmclog 3 ,<br> +.Xr hwpmc 4<br> +.Sh HISTORY<br> +AMD IBS support was first introduced in<br> +.Fx 16.0 .<br> +.Sh AUTHORS<br> +AMD IBS support and this manual page were written<br> +.An Ali Mashtizadeh Aq Mt <a href="mailto:ali@mashtizadeh.com">ali@mashtizadeh.com</a><br> +and sponsored by Netflix, Inc.<br> diff --git a/lib/libpmc/pmc.soft.3 b/lib/libpmc/pmc.soft.3<br> index 08d5af63d02d..f58b3e8ffa26 100644<br> --- a/lib/libpmc/pmc.soft.3<br> +++ b/lib/libpmc/pmc.soft.3<br> @@ -90,6 +90,7 @@ Write page fault.<br> .Xr pmc.corei7 3 ,<br> .Xr pmc.corei7uc 3 ,<br> .Xr pmc.iaf 3 ,<br> +.Xr pmc.ibs 3 ,<br> .Xr pmc.tsc 3 ,<br> .Xr pmc.ucf 3 ,<br> .Xr pmc.westmereuc 3 ,<br> diff --git a/lib/libpmc/pmc.tsc.3 b/lib/libpmc/pmc.tsc.3<br> index 4834d897f90c..73e2377df0c7 100644<br> --- a/lib/libpmc/pmc.tsc.3<br> +++ b/lib/libpmc/pmc.tsc.3<br> @@ -62,6 +62,7 @@ maps to the TSC.<br> .Xr pmc.core 3 ,<br> .Xr pmc.core2 3 ,<br> .Xr pmc.iaf 3 ,<br> +.Xr pmc.ibs 3 ,<br> .Xr pmc.soft 3 ,<br> .Xr pmclog 3 ,<br> .Xr hwpmc 4<br> diff --git a/lib/libpmc/pmc.ucf.3 b/lib/libpmc/pmc.ucf.3<br> index a7cea6bb57f9..37ee0f87a951 100644<br> --- a/lib/libpmc/pmc.ucf.3<br> +++ b/lib/libpmc/pmc.ucf.3<br> @@ -88,6 +88,7 @@ offset C0H under device number 0 and Function 0.<br> .Xr pmc.corei7 3 ,<br> .Xr pmc.corei7uc 3 ,<br> .Xr pmc.iaf 3 ,<br> +.Xr pmc.ibs 3 ,<br> .Xr pmc.soft 3 ,<br> .Xr pmc.tsc 3 ,<br> .Xr pmc.westmere 3 ,<br> diff --git a/sys/dev/hwpmc/hwpmc_ibs.h b/sys/dev/hwpmc/hwpmc_ibs.h<br> index 4449b44c8368..01fc88648558 100644<br> --- a/sys/dev/hwpmc/hwpmc_ibs.h<br> +++ b/sys/dev/hwpmc/hwpmc_ibs.h<br> @@ -67,6 +67,18 @@<br> #define IBS_CTL_LVTOFFSETVALID (1ULL << 8)<br> #define IBS_CTL_LVTOFFSETMASK 0x0000000F<br> <br> +/*<br> + * The minimum sampling rate was selected to match the default used by other<br> + * counters that was also found to be experimentally stable by providing enough<br> + * time between consecutive NMIs. The maximum sample rate is determined by<br> + * setting all available counter bits, i.e., all available bits except the<br> + * bottom four that are zero extended.<br> + */<br> +#define IBS_FETCH_MIN_RATE 65536<br> +#define IBS_FETCH_MAX_RATE 1048560<br> +#define IBS_OP_MIN_RATE 65536<br> +#define IBS_OP_MAX_RATE 134217712<br> +<br> /* IBS Fetch Control */<br> #define IBS_FETCH_CTL 0xC0011030 /* IBS Fetch Control */<br> #define IBS_FETCH_CTL_L3MISS (1ULL << 61) /* L3 Cache Miss */<br> @@ -82,7 +94,8 @@<br> #define IBS_FETCH_CTL_ENABLE (1ULL << 48) /* Enable */<br> #define IBS_FETCH_CTL_MAXCNTMASK 0x0000FFFFULL<br> <br> -#define IBS_FETCH_CTL_TO_LAT(_c) ((_c >> 32) & 0x0000FFFF)<br> +#define IBS_FETCH_INTERVAL_TO_CTL(_c) (((_c) >> 4) & 0x0000FFFF)<br> +#define IBS_FETCH_CTL_TO_LAT(_c) (((_c) >> 32) & 0x0000FFFF)<br> <br> #define IBS_FETCH_LINADDR 0xC0011031 /* Fetch Linear Address */<br> #define IBS_FETCH_PHYSADDR 0xC0011032 /* Fetch Physical Address */<br> @@ -95,12 +108,16 @@<br> <br> /* IBS Execution Control */<br> #define IBS_OP_CTL 0xC0011033 /* IBS Execution Control */<br> +#define IBS_OP_CTL_LATFLTEN (1ULL << 63) /* Load Latency Filtering */<br> #define IBS_OP_CTL_COUNTERCONTROL (1ULL << 19) /* Counter Control */<br> #define IBS_OP_CTL_VALID (1ULL << 18) /* Valid */<br> #define IBS_OP_CTL_ENABLE (1ULL << 17) /* Enable */<br> #define IBS_OP_CTL_L3MISSONLY (1ULL << 16) /* L3 Miss Filtering */<br> #define IBS_OP_CTL_MAXCNTMASK 0x0000FFFFULL<br> <br> +#define IBS_OP_CTL_LDLAT_TO_CTL(_c) ((((ldlat) >> 7) - 1) << 59)<br> +#define IBS_OP_INTERVAL_TO_CTL(_c) ((((_c) >> 4) & 0x0000FFFFULL) | ((_c) & 0x07F00000))<br> +<br> #define IBS_OP_RIP 0xC0011034 /* IBS Op RIP */<br> #define IBS_OP_DATA 0xC0011035 /* IBS Op Data */<br> #define IBS_OP_DATA_RIPINVALID (1ULL << 38) /* RIP Invalid */<br> <br> </blockquote>home | help
Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CAPjTQNG=xAVKitXvuj7Su_yQhrRpHYjRgDW=k3uiSfyah01EdA>
