Date: Thu, 11 Jun 2026 15:12:36 +0000 From: Mitchell Horne <mhorne@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org Cc: Andre Silva <andasilv@amd.com> Subject: git: 0aa4c25f3e83 - main - hwpmc_ibs: Add Zen6 IBS ctl2 filters and alternate disable Message-ID: <6a2ad064.272c3.4e5d56a@gitrepo.freebsd.org>
index | next in thread | raw e-mail
The branch main has been updated by mhorne: URL: https://cgit.FreeBSD.org/src/commit/?id=0aa4c25f3e836e98da419a37526bd51c9e04427b commit 0aa4c25f3e836e98da419a37526bd51c9e04427b Author: Andre Silva <andasilv@amd.com> AuthorDate: 2026-06-11 14:15:35 +0000 Commit: Mitchell Horne <mhorne@FreeBSD.org> CommitDate: 2026-06-11 15:12:30 +0000 hwpmc_ibs: Add Zen6 IBS ctl2 filters and alternate disable Add kernel and userland support for Zen6 IBS extensions per AMD pub 69205 (rev 1.00, March 2026): alternate fetch/op disable via ctl2[0], fetch latency filtering, virtual address bit 63 filtering, and streaming-store filtering. Decode the new IbsOpData2 StrmSt and RmtSocket bits. Update libpmc, pmcstat and manpage. Pre-Zen6 systems work unchanged with ibs_ctl2 == 0. Signed-off-by: Andre Silva <andasilv@amd.com> Reviewed by: Ali Mashtizadeh <ali@mashtizadeh.com>, mhorne Sponsored by: AMD Differential Revision: https://reviews.freebsd.org/D56914 --- lib/libpmc/libpmc.c | 55 ++++++++++- lib/libpmc/pmc.ibs.3 | 77 ++++++++++++++- sys/dev/hwpmc/hwpmc_ibs.c | 214 +++++++++++++++++++++++++++++++++++++---- sys/dev/hwpmc/hwpmc_ibs.h | 45 ++++++++- usr.sbin/pmcstat/pmcstat_log.c | 41 ++++++-- 5 files changed, 395 insertions(+), 37 deletions(-) diff --git a/lib/libpmc/libpmc.c b/lib/libpmc/libpmc.c index 562000aef4e4..63228773216e 100644 --- a/lib/libpmc/libpmc.c +++ b/lib/libpmc/libpmc.c @@ -700,13 +700,14 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec, struct pmc_op_pmcallocate *pmc_config) { char *e, *p, *q; - uint64_t ctl, ldlat; + uint64_t ctl, ctl2, ldlat, fetchlat; u_int ibs_features; u_int regs[4]; pmc_config->pm_caps |= (PMC_CAP_SYSTEM | PMC_CAP_EDGE | PMC_CAP_PRECISE); pmc_config->pm_md.pm_ibs.ibs_ctl = 0; + pmc_config->pm_md.pm_ibs.ibs_ctl2 = 0; /* setup parsing tables */ switch (pe) { @@ -735,6 +736,7 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec, /* parse parameters */ ctl = 0; + ctl2 = 0; if (pe == PMC_EV_IBS_FETCH) { while ((p = strsep(&ctrspec, ",")) != NULL) { if (KWMATCH(p, "l3miss")) { @@ -744,6 +746,37 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec, ctl |= IBS_FETCH_CTL_L3MISSONLY; } else if (KWMATCH(p, "randomize")) { ctl |= IBS_FETCH_CTL_RANDOMIZE; + } else if (KWPREFIXMATCH(p, "fetchlat=")) { + if ((ibs_features & CPUID_IBSID_FETCHLATFILTERING) == 0) + return (-1); + + q = strchr(p, '='); + if (*++q == '\0') + return (-1); + + fetchlat = strtoull(q, &e, 0); + if (e == q || *e != '\0') + return (-1); + + if (fetchlat < IBS_FETCH_CTL2_LAT_MIN || + fetchlat > IBS_FETCH_CTL2_LAT_MAX) + return (-1); + if ((fetchlat % IBS_FETCH_CTL2_LAT_STEP) != 0) + return (-1); + + /* clear prior threshold */ + ctl2 &= ~IBS_FETCH_CTL2_LATFILTERMASK; + ctl2 |= IBS_FETCH_CTL2_LAT_TO_CTL(fetchlat); + } else if (KWMATCH(p, "usr")) { + if ((ibs_features & CPUID_IBSID_ADDRBIT63FILTERING) == 0) + return (-1); + + pmc_config->pm_caps |= PMC_CAP_USER; + } else if (KWMATCH(p, "os")) { + if ((ibs_features & CPUID_IBSID_ADDRBIT63FILTERING) == 0) + return (-1); + + pmc_config->pm_caps |= PMC_CAP_SYSTEM; } else { return (-1); } @@ -783,6 +816,9 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec, */ if (ldlat < 128 || ldlat > 2048) return (-1); + + /* clear prior ldlat threshold */ + ctl &= ~IBS_OP_CTL_LDLATTRSHMASK; ctl |= IBS_OP_CTL_LDLAT_TO_CTL(ldlat); ctl |= IBS_OP_CTL_L3MISSONLY | IBS_OP_CTL_LATFLTEN; } else if (KWMATCH(p, "opcount")) { @@ -790,6 +826,21 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec, return (-1); ctl |= IBS_OP_CTL_COUNTERCONTROL; + } else if (KWMATCH(p, "usr")) { + if ((ibs_features & CPUID_IBSID_ADDRBIT63FILTERING) == 0) + return (-1); + + pmc_config->pm_caps |= PMC_CAP_USER; + } else if (KWMATCH(p, "os")) { + if ((ibs_features & CPUID_IBSID_ADDRBIT63FILTERING) == 0) + return (-1); + + pmc_config->pm_caps |= PMC_CAP_SYSTEM; + } else if (KWMATCH(p, "streamstore")) { + if ((ibs_features & CPUID_IBSID_STRMSTANDRMTSOCKET) == 0) + return (-1); + + ctl2 |= IBS_OP_CTL2_STRMSTFILTER; } else { return (-1); } @@ -806,8 +857,8 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec, ctl |= IBS_OP_INTERVAL_TO_CTL(pmc_config->pm_count); } - pmc_config->pm_md.pm_ibs.ibs_ctl |= ctl; + pmc_config->pm_md.pm_ibs.ibs_ctl2 |= ctl2; return (0); } diff --git a/lib/libpmc/pmc.ibs.3 b/lib/libpmc/pmc.ibs.3 index 574db4df6fa1..4891eb0afb22 100644 --- a/lib/libpmc/pmc.ibs.3 +++ b/lib/libpmc/pmc.ibs.3 @@ -89,7 +89,7 @@ AMD IBS supports the following capabilities. .It PMC_CAP_SYSTEM Ta Yes .It PMC_CAP_TAGGING Ta \&No .It PMC_CAP_THRESHOLD Ta \&No -.It PMC_CAP_USER Ta \&No +.It PMC_CAP_USER Ta Yes (Zen 6) .It PMC_CAP_WRITE Ta \&No .El .Pp @@ -97,25 +97,91 @@ By default AMD IBS enables the edge, interrupt, system and precise flags. .Ss Event Qualifiers Event specifiers for AMD IBS can have the following optional qualifiers: -.Bl -tag -width "ldlat=value" +.Bl -tag -width "fetchlat=value" +.It Li usr +Valid for both +.Ar ibs-fetch +and +.Ar ibs-op +events. +Configure the counter to only sample user-mode events. +Requires Zen 6 IBS extensions +.Pq CPUID Fn Fn8000_0001B +.Va EAX[IbsAddrBit63Filtering] , +and is rejected when the CPU does not advertise support. +.It Li os +Valid for both +.Ar ibs-fetch +and +.Ar ibs-op +events. +Configure the counter to only sample kernel-mode events. +Requires Zen 6 IBS extensions +.Pq CPUID Fn Fn8000_0001B +.Va EAX[IbsAddrBit63Filtering] , +and is rejected when the CPU does not advertise support. +.It Li fetchlat= Ns Ar value +Valid only for +.Ar ibs-fetch +events. +Configure the counter to only sample fetches whose latency is greater than or +equal to +.Ar value +core clock cycles. +The valid range is 128 to 1920 in steps of 128. +Requires Zen 6 IBS extensions +.Pq CPUID Fn Fn8000_0001B +.Va EAX[IbsFetchLatencyFiltering] , +and is rejected when the CPU does not advertise support. .It Li l3miss +Valid for both +.Ar ibs-fetch +and +.Ar ibs-op +events. Configure IBS to only sample if an l3miss occurred. .It Li ldlat= Ns Ar value +Valid only for +.Ar ibs-op +events. Configure the counter to only sample events with load latencies above .Ar ldlat . IBS only supports filtering latencies that are a multiple of 128 and between 128 and 2048. -Load latency filtering can only be used with ibs-op events and imply the -l3miss qualifier. +On pre-Zen 6 hardware this qualifier implies the +.Li l3miss +qualifier; on Zen 6 and later, latency-only filtering without +.Li l3miss +is permitted. .It Li opcount +Valid only for +.Ar ibs-op +events. Count ops rather than cycles. .It Li randomize +Valid only for +.Ar ibs-fetch +events. Randomize the sampling rate. +.It Li streamstore +Valid only for +.Ar ibs-op +events. +Configure the counter to only sample streaming +.Pq non-temporal +store operations. +Requires Zen 6 IBS extensions +.Pq CPUID Fn Fn8000_0001B +.Va EAX[IbsStrmStAndRmtSocket] , +and is rejected when the CPU does not advertise support. .El .Ss AMD IBS Events Specifiers The IBS event class provides only two event specifiers: .Bl -tag -width indent .It Li ibs-fetch Xo +.Op ,usr +.Op ,os +.Op ,fetchlat= Ns Ar value .Op ,l3miss .Op ,randomize .Xc @@ -124,9 +190,12 @@ The .Ar randomize qualifier randomly sets the bottom four bits of the sample rate. .It Li ibs-op Xo +.Op ,usr +.Op ,os .Op ,l3miss .Op ,ldlat= Ns Ar ldlat .Op ,opcount +.Op ,streamstore .Xc Collect performance samples during instruction execution. The diff --git a/sys/dev/hwpmc/hwpmc_ibs.c b/sys/dev/hwpmc/hwpmc_ibs.c index ae14f2ccb14c..16bc5edd5019 100644 --- a/sys/dev/hwpmc/hwpmc_ibs.c +++ b/sys/dev/hwpmc/hwpmc_ibs.c @@ -60,9 +60,15 @@ struct ibs_descr { static uint64_t ibs_features; static uint64_t ibs_fetch_allowed_mask; static uint64_t ibs_op_allowed_mask; +static uint64_t ibs_fetch_ctl2_allowed_mask; +static uint64_t ibs_op_ctl2_allowed_mask; +static bool ibs_fetch_ctl2_supported; +static bool ibs_op_ctl2_supported; static uint64_t ibs_fetch_extra_mask; +static uint64_t ibs_fetch_ctl2_extra_mask; static uint64_t ibs_op_extra_mask; +static uint64_t ibs_op_ctl2_extra_mask; SYSCTL_DECL(_kern_hwpmc); @@ -70,10 +76,18 @@ SYSCTL_U64(_kern_hwpmc, OID_AUTO, ibs_fetch_extra_mask, CTLFLAG_RDTUN, &ibs_fetch_extra_mask, 0, "Extra allowed bits in the IBS fetch control MSR (override; default 0)"); +SYSCTL_U64(_kern_hwpmc, OID_AUTO, ibs_fetch_ctl2_extra_mask, CTLFLAG_RDTUN, + &ibs_fetch_ctl2_extra_mask, 0, + "Extra allowed bits in the IBS fetch control 2 MSR (override; default 0)"); + SYSCTL_U64(_kern_hwpmc, OID_AUTO, ibs_op_extra_mask, CTLFLAG_RDTUN, &ibs_op_extra_mask, 0, "Extra allowed bits in the IBS op control MSR (override; default 0)"); +SYSCTL_U64(_kern_hwpmc, OID_AUTO, ibs_op_ctl2_extra_mask, CTLFLAG_RDTUN, + &ibs_op_ctl2_extra_mask, 0, + "Extra allowed bits in the IBS op control 2 MSR (override; default 0)"); + /* * Per-processor information */ @@ -92,8 +106,10 @@ ibs_init_policy(void) { ibs_fetch_allowed_mask = IBS_FETCH_ALLOWED_MASK_BASE; + ibs_fetch_ctl2_allowed_mask = 0; ibs_op_allowed_mask = IBS_OP_CTL_MAXCNTBASEMASK; + ibs_op_ctl2_allowed_mask = 0; if ((ibs_features & CPUID_IBSID_ZEN4IBSEXTENSIONS) != 0) ibs_fetch_allowed_mask |= IBS_FETCH_CTL_L3MISSONLY; @@ -106,6 +122,26 @@ ibs_init_policy(void) if ((ibs_features & CPUID_IBSID_ZEN4IBSEXTENSIONS) != 0) ibs_op_allowed_mask |= IBS_OP_CTL_L3MISSONLY; + + if ((ibs_features & CPUID_IBSID_FETCHLATFILTERING) != 0) + ibs_fetch_ctl2_allowed_mask |= IBS_FETCH_CTL2_LATFILTERMASK; + + if ((ibs_features & CPUID_IBSID_STRMSTANDRMTSOCKET) != 0) + ibs_op_ctl2_allowed_mask |= IBS_OP_CTL2_STRMSTFILTER; + + if ((ibs_features & CPUID_IBSID_IBSDIS) != 0) { + ibs_fetch_ctl2_supported = true; + ibs_op_ctl2_supported = true; + } + + /* + * ctl2 MSRs only exist on Zen 6; writing them on older silicon + * would #GP. + */ + if (!ibs_fetch_ctl2_supported) + ibs_fetch_ctl2_supported = (ibs_fetch_ctl2_allowed_mask != 0); + if (!ibs_op_ctl2_supported) + ibs_op_ctl2_supported = (ibs_op_ctl2_allowed_mask != 0); } static int @@ -128,7 +164,12 @@ ibs_validate_op_config(uint64_t config) if ((config & IBS_OP_CTL_LATFLTEN) != 0) { if ((ibs_features & CPUID_IBSID_IBSLOADLATENCYFILT) == 0) return (EINVAL); - if ((config & IBS_OP_CTL_L3MISSONLY) == 0) + /* + * Zen 6 decouples L3MISSONLY from load-latency filtering + * (AMD pub 69205); enforce the pairing only on older parts. + */ + if ((ibs_features & CPUID_IBSID_IBSDIS) == 0 && + (config & IBS_OP_CTL_L3MISSONLY) == 0) return (EINVAL); allowed_mask |= IBS_OP_CTL_LDLATMASK | IBS_OP_CTL_L3MISSONLY; @@ -143,16 +184,67 @@ ibs_validate_op_config(uint64_t config) } static int -ibs_validate_pmc_config(int ri, uint64_t config) +ibs_validate_fetch_ctl2_config(uint64_t config) { + uint64_t allowed_mask; + + if (config == 0) + return (0); + + if (!ibs_fetch_ctl2_supported) + return (EXTERROR(EINVAL, + "IBS fetch ctl2 features are not supported on this CPU")); + + allowed_mask = ibs_fetch_ctl2_allowed_mask | ibs_fetch_ctl2_extra_mask; + + if ((config & ~allowed_mask) != 0) + return (EXTERROR(EINVAL, + "IBS fetch ctl2 config 0x%jx has bits outside allowed" + " mask 0x%jx", (uint64_t)config, (uint64_t)allowed_mask)); + + return (0); +} + +static int +ibs_validate_op_ctl2_config(uint64_t config) +{ + uint64_t allowed_mask; + + if (config == 0) + return (0); + + if (!ibs_op_ctl2_supported) + return (EXTERROR(EINVAL, + "IBS op ctl2 features are not supported on this CPU")); + + allowed_mask = ibs_op_ctl2_allowed_mask | ibs_op_ctl2_extra_mask; + + if ((config & ~allowed_mask) != 0) + return (EXTERROR(EINVAL, + "IBS op ctl2 config 0x%jx has bits outside allowed mask" + " 0x%jx", (uint64_t)config, (uint64_t)allowed_mask)); + + return (0); +} + +static int +ibs_validate_pmc_config(int ri, uint64_t config, uint64_t config2) +{ + int error; switch (ri) { case IBS_PMC_FETCH: - return (ibs_validate_fetch_config(config)); + error = ibs_validate_fetch_config(config); + if (error != 0) + return (error); + return (ibs_validate_fetch_ctl2_config(config2)); case IBS_PMC_OP: - return (ibs_validate_op_config(config)); + error = ibs_validate_op_config(config); + if (error != 0) + return (error); + return (ibs_validate_op_ctl2_config(config2)); default: - return (EINVAL); + return (EXTERROR(EINVAL, "invalid IBS PMC index %d", ri)); } } @@ -266,7 +358,7 @@ static int ibs_allocate_pmc(int cpu __unused, int ri, struct pmc *pm, const struct pmc_op_pmcallocate *a) { - uint64_t caps, config; + uint64_t caps, config, config2; int error; KASSERT(ri >= 0 && ri < IBS_NPMCS, @@ -284,20 +376,53 @@ ibs_allocate_pmc(int cpu __unused, int ri, struct pmc *pm, PMCDBG2(MDP, ALL, 1, "ibs-allocate ri=%d caps=0x%x", ri, caps); - if ((caps & PMC_CAP_SYSTEM) == 0) - return (EXTERROR(EINVAL, "IBS requires SYSTEM capability")); + if ((ibs_features & CPUID_IBSID_ADDRBIT63FILTERING) != 0) { + if ((caps & (PMC_CAP_USER | PMC_CAP_SYSTEM)) == 0) + return (EXTERROR(EINVAL, + "IBS requires at least USER or SYSTEM capability")); + } else { + if ((caps & PMC_CAP_SYSTEM) == 0) + return (EXTERROR(EINVAL, + "IBS requires SYSTEM capability")); + if ((caps & PMC_CAP_USER) != 0) + return (EXTERROR(EINVAL, + "IBS USER filtering requires Zen 6 addr63 support")); + } if (!PMC_IS_SAMPLING_MODE(a->pm_mode)) return (EINVAL); config = a->pm_md.pm_ibs.ibs_ctl; - error = ibs_validate_pmc_config(ri, config); + config2 = a->pm_md.pm_ibs.ibs_ctl2; + error = ibs_validate_pmc_config(ri, config, config2); if (error != 0) return (error); pm->pm_md.pm_ibs.ibs_ctl = config; + pm->pm_md.pm_ibs.ibs_ctl2 = config2; + + if ((ibs_features & CPUID_IBSID_ADDRBIT63FILTERING) != 0) { + if ((caps & PMC_CAP_USER) != 0 && + (caps & PMC_CAP_SYSTEM) == 0) { + if (ri == IBS_PMC_FETCH) + pm->pm_md.pm_ibs.ibs_ctl2 |= + IBS_FETCH_CTL2_EXCLADDR63EQ1; + else + pm->pm_md.pm_ibs.ibs_ctl2 |= + IBS_OP_CTL2_EXCLRIP63EQ1; + } else if ((caps & PMC_CAP_SYSTEM) != 0 && + (caps & PMC_CAP_USER) == 0) { + if (ri == IBS_PMC_FETCH) + pm->pm_md.pm_ibs.ibs_ctl2 |= + IBS_FETCH_CTL2_EXCLADDR63EQ0; + else + pm->pm_md.pm_ibs.ibs_ctl2 |= + IBS_OP_CTL2_EXCLRIP63EQ0; + } + } - PMCDBG2(MDP, ALL, 2, "ibs-allocate ri=%d -> config=0x%jx", ri, - config); + PMCDBG3(MDP, ALL, 2, + "ibs-allocate ri=%d -> config=0x%jx config2=0x%jx", ri, + config, config2); return (0); } @@ -349,16 +474,24 @@ ibs_start_pmc(int cpu __diagused, int ri, struct pmc *pm) /* * Turn on the ENABLE bit. Zeroing out the control register eliminates - * stale valid bits from spurious NMIs and it resets the counter. + * stale valid bits from spurious NMIs and it resets the counter. This + * is safe here because the counter is not yet enabled; the NMI re-arm + * path must not do the same (Family 10h erratum #420). */ switch (ri) { case IBS_PMC_FETCH: wrmsr(IBS_FETCH_CTL, 0); + if (ibs_fetch_ctl2_supported) + wrmsr(IBS_FETCH_CTL2, + pm->pm_md.pm_ibs.ibs_ctl2 & ~IBS_FETCH_CTL2_DISABLE); config = pm->pm_md.pm_ibs.ibs_ctl | IBS_FETCH_CTL_ENABLE; wrmsr(IBS_FETCH_CTL, config); break; case IBS_PMC_OP: wrmsr(IBS_OP_CTL, 0); + if (ibs_op_ctl2_supported) + wrmsr(IBS_OP_CTL2, + pm->pm_md.pm_ibs.ibs_ctl2 & ~IBS_OP_CTL2_DISABLE); config = pm->pm_md.pm_ibs.ibs_ctl | IBS_OP_CTL_ENABLE; wrmsr(IBS_OP_CTL, config); break; @@ -374,7 +507,8 @@ static int ibs_stop_pmc(int cpu __diagused, int ri, struct pmc *pm) { int i; - uint64_t config; + uint64_t config, config2; + bool use_alt_disable; KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), ("[ibs,%d] illegal CPU value %d", __LINE__, cpu)); @@ -394,23 +528,47 @@ ibs_stop_pmc(int cpu __diagused, int ri, struct pmc *pm) * are stopping and discard spurious NMIs. We then retry clearing the * control register for 50us. This gives us enough time and ensures * that the valid bit is not accidently stuck after a spurious NMI. + * + * On Zen 6 with the alternate disable bit (CPUID IbsDis), assert the + * ctl2 DISABLE bit first. This avoids an RMW hazard in ctl1 that the + * processor may update concurrently while sampling. */ config = pm->pm_md.pm_ibs.ibs_ctl; + config2 = pm->pm_md.pm_ibs.ibs_ctl2; + use_alt_disable = (ibs_features & CPUID_IBSID_IBSDIS) != 0; atomic_store_int(&ibs_pcpu[cpu]->pc_status, IBS_CPU_STOPPING); + /* + * On Zen 6, ctl2 DISABLE is the authoritative stop switch; skip + * the legacy ctl1 RMW and clear it directly + */ switch (ri) { case IBS_PMC_FETCH: - wrmsr(IBS_FETCH_CTL, config & ~IBS_FETCH_CTL_MAXCNTMASK); - DELAY(1); - config &= ~IBS_FETCH_CTL_ENABLE; - wrmsr(IBS_FETCH_CTL, config); + if (use_alt_disable) { + wrmsr(IBS_FETCH_CTL2, + config2 | IBS_FETCH_CTL2_DISABLE); + wrmsr(IBS_FETCH_CTL, config & ~IBS_FETCH_CTL_ENABLE); + } else { + wrmsr(IBS_FETCH_CTL, + config & ~IBS_FETCH_CTL_MAXCNTMASK); + DELAY(1); + config &= ~IBS_FETCH_CTL_ENABLE; + wrmsr(IBS_FETCH_CTL, config); + } break; case IBS_PMC_OP: - wrmsr(IBS_OP_CTL, config & ~IBS_OP_CTL_MAXCNTMASK); - DELAY(1); - config &= ~IBS_OP_CTL_ENABLE; - wrmsr(IBS_OP_CTL, config); + if (use_alt_disable) { + wrmsr(IBS_OP_CTL2, + config2 | IBS_OP_CTL2_DISABLE); + wrmsr(IBS_OP_CTL, config & ~IBS_OP_CTL_ENABLE); + } else { + wrmsr(IBS_OP_CTL, + config & ~IBS_OP_CTL_MAXCNTMASK); + DELAY(1); + config &= ~IBS_OP_CTL_ENABLE; + wrmsr(IBS_OP_CTL, config); + } break; } @@ -420,9 +578,13 @@ ibs_stop_pmc(int cpu __diagused, int ri, struct pmc *pm) switch (ri) { case IBS_PMC_FETCH: wrmsr(IBS_FETCH_CTL, 0); + if (ibs_fetch_ctl2_supported) + wrmsr(IBS_FETCH_CTL2, 0); break; case IBS_PMC_OP: wrmsr(IBS_OP_CTL, 0); + if (ibs_op_ctl2_supported) + wrmsr(IBS_OP_CTL2, 0); break; } } @@ -456,6 +618,9 @@ pmc_ibs_process_fetch(struct pmc *pm, struct trapframe *tf, uint64_t config) mpd.pl_mpdata[PMC_MPIDX_FETCH_PHYSADDR] = rdmsr(IBS_FETCH_PHYSADDR); } + if (ibs_fetch_ctl2_supported) { + mpd.pl_mpdata[PMC_MPIDX_FETCH_CTL2] = rdmsr(IBS_FETCH_CTL2); + } pmc_process_interrupt_mp(PMC_HR, pm, tf, &mpd); @@ -490,6 +655,9 @@ pmc_ibs_process_op(struct pmc *pm, struct trapframe *tf, uint64_t config) if ((ibs_features & CPUID_IBSID_IBSOPDATA4) != 0) { mpd.pl_mpdata[PMC_MPIDX_OP_DATA4] = rdmsr(IBS_OP_DATA4); } + if (ibs_op_ctl2_supported) { + mpd.pl_mpdata[PMC_MPIDX_OP_CTL2] = rdmsr(IBS_OP_CTL2); + } pmc_process_interrupt_mp(PMC_HR, pm, tf, &mpd); @@ -647,6 +815,10 @@ ibs_pcpu_fini(struct pmc_mdep *md, int cpu) */ wrmsr(IBS_FETCH_CTL, 0); wrmsr(IBS_OP_CTL, 0); + if (ibs_fetch_ctl2_supported) + wrmsr(IBS_FETCH_CTL2, 0); + if (ibs_op_ctl2_supported) + wrmsr(IBS_OP_CTL2, 0); /* * Free up allocated space. diff --git a/sys/dev/hwpmc/hwpmc_ibs.h b/sys/dev/hwpmc/hwpmc_ibs.h index 433397954d4f..d1474b7cba32 100644 --- a/sys/dev/hwpmc/hwpmc_ibs.h +++ b/sys/dev/hwpmc/hwpmc_ibs.h @@ -35,6 +35,9 @@ /* * All of the CPUID definitions come from AMD PPR Vol 1 for AMD Family 1Ah * Model 02h C1 (57238) 2024-09-29 Revision 0.24. + * Zen 6 CPUID bits (IBSDIS, FETCHLATFILTERING, ADDRBIT63FILTERING) come from + * AMD64 Architecture Programmer's Manual Volume 2: System Programming (24593) + * 2025-07-02 Version 3.43. */ #define CPUID_IBSID 0x8000001B #define CPUID_IBSID_IBSFFV 0x00000001 /* IBS Feature Flags Valid */ @@ -50,6 +53,12 @@ #define CPUID_IBSID_IBSOPDATA4 0x00000400 /* IBS OP DATA4 */ #define CPUID_IBSID_ZEN4IBSEXTENSIONS 0x00000800 /* IBS Zen 4 Extensions */ #define CPUID_IBSID_IBSLOADLATENCYFILT 0x00001000 /* Load Latency Filtering */ +#define CPUID_IBSID_IBSDIS 0x00002000 /* Alternate IBS Disable */ +#define CPUID_IBSID_FETCHLATFILTERING 0x00004000 /* Fetch Latency Filter */ +#define CPUID_IBSID_ADDRBIT63FILTERING 0x00008000 /* Addr Bit 63 Filter */ +#define CPUID_IBSID_STRMSTANDRMTSOCKET 0x00010000 /* StrmSt + RmtSocket */ +#define CPUID_IBSID_BUFFERV1 0x00020000 /* IBS Buffering V1 */ +#define CPUID_IBSID_MEMPROFILERV1 0x00040000 /* IBS Memory Profiler V1 */ #define CPUID_IBSID_IBSUPDTDDTLBSTATS 0x00080000 /* Simplified DTLB Stats */ /* @@ -107,11 +116,27 @@ #define IBS_FETCH_PHYSADDR 0xC0011032 /* Fetch Physical Address */ #define IBS_FETCH_EXTCTL 0xC001103C /* Fetch Control Extended */ +/* IBS Fetch Control 2 (Zen 6) */ +#define IBS_FETCH_CTL2 0xC001103F /* IBS Fetch Control 2 */ +#define IBS_FETCH_CTL2_DISABLE (1ULL << 0) /* IBS Fetch Disable */ +#define IBS_FETCH_CTL2_LATFILTERMASK (0xFULL << 1) /* Fetch Latency Filter */ +#define IBS_FETCH_CTL2_EXCLADDR63EQ1 (1ULL << 5) /* Exclude addr bit63=1 */ +#define IBS_FETCH_CTL2_EXCLADDR63EQ0 (1ULL << 6) /* Exclude addr bit63=0 */ +#define IBS_FETCH_CTL2_ADDR63MASK (IBS_FETCH_CTL2_EXCLADDR63EQ0 | \ + IBS_FETCH_CTL2_EXCLADDR63EQ1) + +#define IBS_FETCH_CTL2_LAT_MIN 128 +#define IBS_FETCH_CTL2_LAT_MAX 1920 +#define IBS_FETCH_CTL2_LAT_STEP 128 +#define IBS_FETCH_CTL2_LAT_TO_CTL(_l) ((((_l) >> 7) & 0xFULL) << 1) +#define IBS_FETCH_CTL2_CTL_TO_LAT(_c) ((((_c) >> 1) & 0xFULL) << 7) + #define PMC_MPIDX_FETCH_CTL 0 #define PMC_MPIDX_FETCH_EXTCTL 1 #define PMC_MPIDX_FETCH_LINADDR 2 #define PMC_MPIDX_FETCH_PHYSADDR 3 -#define PMC_MPIDX_FETCH_MAX (PMC_MPIDX_FETCH_PHYSADDR + 1) +#define PMC_MPIDX_FETCH_CTL2 4 +#define PMC_MPIDX_FETCH_MAX (PMC_MPIDX_FETCH_CTL2 + 1) /* IBS Execution Control */ #define IBS_OP_CTL 0xC0011033 /* IBS Execution Control */ @@ -148,6 +173,8 @@ #define IBS_OP_DATA_RETURN (1ULL << 34) /* Return */ #define IBS_OP_DATA2 0xC0011036 /* IBS Op Data 2 */ +#define IBS_OP_DATA2_RMTSOCKET (1ULL << 9) /* Remote Socket */ +#define IBS_OP_DATA2_STRMST (1ULL << 8) /* Streaming Store */ #define IBS_OP_DATA3 0xC0011037 /* IBS Op Data 3 */ #define IBS_OP_DATA3_DCPHYADDRVALID (1ULL << 18) /* DC Physical Address */ #define IBS_OP_DATA3_DCLINADDRVALID (1ULL << 17) /* DC Linear Address */ @@ -169,6 +196,15 @@ #define IBS_OP_DATA4 0xC001103D /* IBS Op Data 4 */ #define IBS_OP_DATA4_LDRESYNC (1ULL << 0) /* Load Resync */ +/* IBS Execution Control 2 (Zen 6) */ +#define IBS_OP_CTL2 0xC001103E /* IBS Execution Control 2 */ +#define IBS_OP_CTL2_DISABLE (1ULL << 0) /* IBS Execution Disable */ +#define IBS_OP_CTL2_EXCLRIP63EQ0 (1ULL << 1) /* Exclude RIP bit63=0 */ +#define IBS_OP_CTL2_EXCLRIP63EQ1 (1ULL << 2) /* Exclude RIP bit63=1 */ +#define IBS_OP_CTL2_STRMSTFILTER (1ULL << 3) /* Streaming Store Filter */ +#define IBS_OP_CTL2_RIP63MASK (IBS_OP_CTL2_EXCLRIP63EQ0 | \ + IBS_OP_CTL2_EXCLRIP63EQ1) + #define PMC_MPIDX_OP_CTL 0 #define PMC_MPIDX_OP_RIP 1 #define PMC_MPIDX_OP_DATA 2 @@ -178,7 +214,8 @@ #define PMC_MPIDX_OP_DC_PHYSADDR 6 #define PMC_MPIDX_OP_TGT_RIP 7 #define PMC_MPIDX_OP_DATA4 8 -#define PMC_MPIDX_OP_MAX (PMC_MPIDX_OP_DATA4 + 1) +#define PMC_MPIDX_OP_CTL2 9 +#define PMC_MPIDX_OP_MAX (PMC_MPIDX_OP_CTL2 + 1) /* * IBS data is encoded as using the multipart flag in the existing callchain @@ -204,8 +241,8 @@ struct pmc_md_ibs_pmc { uint64_t ibs_ctl2; }; -#define IBS_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_SYSTEM | \ - PMC_CAP_EDGE | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE) +#define IBS_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_USER | \ + PMC_CAP_SYSTEM | PMC_CAP_EDGE | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE) int pmc_ibs_initialize(struct pmc_mdep *md, int ncpu); void pmc_ibs_finalize(struct pmc_mdep *md); diff --git a/usr.sbin/pmcstat/pmcstat_log.c b/usr.sbin/pmcstat/pmcstat_log.c index 9af2ccc4e365..77a6ea799594 100644 --- a/usr.sbin/pmcstat/pmcstat_log.c +++ b/usr.sbin/pmcstat/pmcstat_log.c @@ -371,10 +371,10 @@ pmcstat_pmcindex_to_pmcr(int pmcin) #if defined(__amd64__) || defined(__i386__) static void -pmcstat_print_ibs_fetch(struct pmclog_ev_callchain *cc, int offset) +pmcstat_print_ibs_fetch(struct pmclog_ev_callchain *cc, int offset, int len64) { uint64_t *ibsbuf = (uint64_t *)&cc->pl_pc[offset]; - uint64_t ctl; + uint64_t ctl, ctl2; ctl = ibsbuf[PMC_MPIDX_FETCH_CTL]; PMCSTAT_PRINT_ENTRY("ibs-fetch", "%s%s%s%s", @@ -390,15 +390,28 @@ pmcstat_print_ibs_fetch(struct pmclog_ev_callchain *cc, int offset) PMCSTAT_PRINT_ENTRY("IBS", "Physical Address %" PRIx64, ibsbuf[PMC_MPIDX_FETCH_PHYSADDR]); } + if (len64 > PMC_MPIDX_FETCH_CTL2) { + ctl2 = ibsbuf[PMC_MPIDX_FETCH_CTL2]; + if ((ctl2 & IBS_FETCH_CTL2_EXCLADDR63EQ1) != 0) + PMCSTAT_PRINT_ENTRY("ibs-fetch", "addr63=0"); + if ((ctl2 & IBS_FETCH_CTL2_EXCLADDR63EQ0) != 0) + PMCSTAT_PRINT_ENTRY("ibs-fetch", "addr63=1"); + if ((ctl2 & IBS_FETCH_CTL2_LATFILTERMASK) != 0) { + PMCSTAT_PRINT_ENTRY("ibs-fetch", + "fetchlat>=%" PRIu64, + (uint64_t)IBS_FETCH_CTL2_CTL_TO_LAT(ctl2)); + } + } } static void -pmcstat_print_ibs_op(struct pmclog_ev_callchain *cc, int offset) +pmcstat_print_ibs_op(struct pmclog_ev_callchain *cc, int offset, int len64) { uint64_t *ibsbuf = (uint64_t *)&cc->pl_pc[offset]; - uint64_t data, data3; + uint64_t data, data2, data3, ctl2; data = ibsbuf[PMC_MPIDX_OP_DATA]; + data2 = ibsbuf[PMC_MPIDX_OP_DATA2]; data3 = ibsbuf[PMC_MPIDX_OP_DATA3]; if ((data & IBS_OP_DATA_RIPINVALID) == 0) { @@ -416,6 +429,11 @@ pmcstat_print_ibs_op(struct pmclog_ev_callchain *cc, int offset) (data3 & IBS_OP_DATA3_LOCKEDOP) ? "lock " : "", (data3 & IBS_OP_DATA3_DCL1TLBMISS) ? "l1tlbmiss " : "", (data3 & IBS_OP_DATA3_DCMISS) ? "dcmiss " : ""); + if ((data2 & (IBS_OP_DATA2_STRMST | IBS_OP_DATA2_RMTSOCKET)) != 0) { + PMCSTAT_PRINT_ENTRY("ibs-op", "%s%s", + (data2 & IBS_OP_DATA2_STRMST) ? "streamstore " : "", + (data2 & IBS_OP_DATA2_RMTSOCKET) ? "remotesocket" : ""); + } PMCSTAT_PRINT_ENTRY("ibs-op", "Latency %" PRIu64, IBS_OP_DATA3_TO_DCLAT(data3)); if ((data3 & IBS_OP_DATA3_DCLINADDRVALID) != 0) { @@ -426,6 +444,15 @@ pmcstat_print_ibs_op(struct pmclog_ev_callchain *cc, int offset) PMCSTAT_PRINT_ENTRY("ibs-op", "Physical Address %" PRIx64, ibsbuf[PMC_MPIDX_OP_DC_PHYSADDR]); } + if (len64 > PMC_MPIDX_OP_CTL2) { + ctl2 = ibsbuf[PMC_MPIDX_OP_CTL2]; + if ((ctl2 & IBS_OP_CTL2_EXCLRIP63EQ1) != 0) + PMCSTAT_PRINT_ENTRY("ibs-op", "addr63=0"); + if ((ctl2 & IBS_OP_CTL2_EXCLRIP63EQ0) != 0) + PMCSTAT_PRINT_ENTRY("ibs-op", "addr63=1"); + if ((ctl2 & IBS_OP_CTL2_STRMSTFILTER) != 0) + PMCSTAT_PRINT_ENTRY("ibs-op", "streamstore"); + } } #endif @@ -446,9 +473,11 @@ pmcstat_print_multipart(struct pmclog_ev_callchain *cc) return (offset); #if defined(__amd64__) || defined(__i386__) } else if (type == PMC_CC_MULTIPART_IBS_FETCH) { - pmcstat_print_ibs_fetch(cc, offset); + pmcstat_print_ibs_fetch(cc, offset, + len / (sizeof(uint64_t) / sizeof(uintptr_t))); } else if (type == PMC_CC_MULTIPART_IBS_OP) { - pmcstat_print_ibs_op(cc, offset); + pmcstat_print_ibs_op(cc, offset, + len / (sizeof(uint64_t) / sizeof(uintptr_t))); #endif } else { PMCSTAT_PRINT_ENTRY("unsupported multipart type!");home | help
Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?6a2ad064.272c3.4e5d56a>
