Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 11 Jun 2026 15:12:36 +0000
From:      Mitchell Horne <mhorne@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Cc:        Andre Silva <andasilv@amd.com>
Subject:   git: 0aa4c25f3e83 - main - hwpmc_ibs: Add Zen6 IBS ctl2 filters and alternate disable
Message-ID:  <6a2ad064.272c3.4e5d56a@gitrepo.freebsd.org>

index | next in thread | raw e-mail

The branch main has been updated by mhorne:

URL: https://cgit.FreeBSD.org/src/commit/?id=0aa4c25f3e836e98da419a37526bd51c9e04427b

commit 0aa4c25f3e836e98da419a37526bd51c9e04427b
Author:     Andre Silva <andasilv@amd.com>
AuthorDate: 2026-06-11 14:15:35 +0000
Commit:     Mitchell Horne <mhorne@FreeBSD.org>
CommitDate: 2026-06-11 15:12:30 +0000

    hwpmc_ibs: Add Zen6 IBS ctl2 filters and alternate disable
    
    Add kernel and userland support for Zen6 IBS extensions per AMD pub
    69205 (rev 1.00, March 2026): alternate fetch/op disable via ctl2[0],
    fetch latency filtering, virtual address bit 63 filtering, and
    streaming-store filtering.  Decode the new IbsOpData2 StrmSt and
    RmtSocket bits. Update libpmc, pmcstat and manpage.
    
    Pre-Zen6 systems work unchanged with ibs_ctl2 == 0.
    
    Signed-off-by:  Andre Silva <andasilv@amd.com>
    Reviewed by:    Ali Mashtizadeh <ali@mashtizadeh.com>, mhorne
    Sponsored by:   AMD
    Differential Revision:  https://reviews.freebsd.org/D56914
---
 lib/libpmc/libpmc.c            |  55 ++++++++++-
 lib/libpmc/pmc.ibs.3           |  77 ++++++++++++++-
 sys/dev/hwpmc/hwpmc_ibs.c      | 214 +++++++++++++++++++++++++++++++++++++----
 sys/dev/hwpmc/hwpmc_ibs.h      |  45 ++++++++-
 usr.sbin/pmcstat/pmcstat_log.c |  41 ++++++--
 5 files changed, 395 insertions(+), 37 deletions(-)

diff --git a/lib/libpmc/libpmc.c b/lib/libpmc/libpmc.c
index 562000aef4e4..63228773216e 100644
--- a/lib/libpmc/libpmc.c
+++ b/lib/libpmc/libpmc.c
@@ -700,13 +700,14 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec,
     struct pmc_op_pmcallocate *pmc_config)
 {
 	char *e, *p, *q;
-	uint64_t ctl, ldlat;
+	uint64_t ctl, ctl2, ldlat, fetchlat;
 	u_int ibs_features;
 	u_int regs[4];
 
 	pmc_config->pm_caps |=
 	    (PMC_CAP_SYSTEM | PMC_CAP_EDGE | PMC_CAP_PRECISE);
 	pmc_config->pm_md.pm_ibs.ibs_ctl = 0;
+	pmc_config->pm_md.pm_ibs.ibs_ctl2 = 0;
 
 	/* setup parsing tables */
 	switch (pe) {
@@ -735,6 +736,7 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec,
 
 	/* parse parameters */
 	ctl = 0;
+	ctl2 = 0;
 	if (pe == PMC_EV_IBS_FETCH) {
 		while ((p = strsep(&ctrspec, ",")) != NULL) {
 			if (KWMATCH(p, "l3miss")) {
@@ -744,6 +746,37 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec,
 				ctl |= IBS_FETCH_CTL_L3MISSONLY;
 			} else if (KWMATCH(p, "randomize")) {
 				ctl |= IBS_FETCH_CTL_RANDOMIZE;
+			} else if (KWPREFIXMATCH(p, "fetchlat=")) {
+				if ((ibs_features & CPUID_IBSID_FETCHLATFILTERING) == 0)
+					return (-1);
+
+				q = strchr(p, '=');
+				if (*++q == '\0')
+					return (-1);
+
+				fetchlat = strtoull(q, &e, 0);
+				if (e == q || *e != '\0')
+					return (-1);
+
+				if (fetchlat < IBS_FETCH_CTL2_LAT_MIN ||
+				    fetchlat > IBS_FETCH_CTL2_LAT_MAX)
+					return (-1);
+				if ((fetchlat % IBS_FETCH_CTL2_LAT_STEP) != 0)
+					return (-1);
+
+				/* clear prior threshold */
+				ctl2 &= ~IBS_FETCH_CTL2_LATFILTERMASK;
+				ctl2 |= IBS_FETCH_CTL2_LAT_TO_CTL(fetchlat);
+			} else if (KWMATCH(p, "usr")) {
+				if ((ibs_features & CPUID_IBSID_ADDRBIT63FILTERING) == 0)
+					return (-1);
+
+				pmc_config->pm_caps |= PMC_CAP_USER;
+			} else if (KWMATCH(p, "os")) {
+				if ((ibs_features & CPUID_IBSID_ADDRBIT63FILTERING) == 0)
+					return (-1);
+
+				pmc_config->pm_caps |= PMC_CAP_SYSTEM;
 			} else {
 				return (-1);
 			}
@@ -783,6 +816,9 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec,
 				 */
 				if (ldlat < 128 || ldlat > 2048)
 					return (-1);
+
+				/* clear prior ldlat threshold */
+				ctl &= ~IBS_OP_CTL_LDLATTRSHMASK;
 				ctl |= IBS_OP_CTL_LDLAT_TO_CTL(ldlat);
 				ctl |= IBS_OP_CTL_L3MISSONLY | IBS_OP_CTL_LATFLTEN;
 			} else if (KWMATCH(p, "opcount")) {
@@ -790,6 +826,21 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec,
 					return (-1);
 
 				ctl |= IBS_OP_CTL_COUNTERCONTROL;
+			} else if (KWMATCH(p, "usr")) {
+				if ((ibs_features & CPUID_IBSID_ADDRBIT63FILTERING) == 0)
+					return (-1);
+
+				pmc_config->pm_caps |= PMC_CAP_USER;
+			} else if (KWMATCH(p, "os")) {
+				if ((ibs_features & CPUID_IBSID_ADDRBIT63FILTERING) == 0)
+					return (-1);
+
+				pmc_config->pm_caps |= PMC_CAP_SYSTEM;
+			} else if (KWMATCH(p, "streamstore")) {
+				if ((ibs_features & CPUID_IBSID_STRMSTANDRMTSOCKET) == 0)
+					return (-1);
+
+				ctl2 |= IBS_OP_CTL2_STRMSTFILTER;
 			} else {
 				return (-1);
 			}
@@ -806,8 +857,8 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec,
 		ctl |= IBS_OP_INTERVAL_TO_CTL(pmc_config->pm_count);
 	}
 
-
 	pmc_config->pm_md.pm_ibs.ibs_ctl |= ctl;
+	pmc_config->pm_md.pm_ibs.ibs_ctl2 |= ctl2;
 
 	return (0);
 }
diff --git a/lib/libpmc/pmc.ibs.3 b/lib/libpmc/pmc.ibs.3
index 574db4df6fa1..4891eb0afb22 100644
--- a/lib/libpmc/pmc.ibs.3
+++ b/lib/libpmc/pmc.ibs.3
@@ -89,7 +89,7 @@ AMD IBS supports the following capabilities.
 .It PMC_CAP_SYSTEM Ta Yes
 .It PMC_CAP_TAGGING Ta \&No
 .It PMC_CAP_THRESHOLD Ta \&No
-.It PMC_CAP_USER Ta \&No
+.It PMC_CAP_USER Ta Yes (Zen 6)
 .It PMC_CAP_WRITE Ta \&No
 .El
 .Pp
@@ -97,25 +97,91 @@ By default AMD IBS enables the edge, interrupt, system and precise flags.
 .Ss Event Qualifiers
 Event specifiers for AMD IBS can have the following optional
 qualifiers:
-.Bl -tag -width "ldlat=value"
+.Bl -tag -width "fetchlat=value"
+.It Li usr
+Valid for both
+.Ar ibs-fetch
+and
+.Ar ibs-op
+events.
+Configure the counter to only sample user-mode events.
+Requires Zen 6 IBS extensions
+.Pq CPUID Fn Fn8000_0001B
+.Va EAX[IbsAddrBit63Filtering] ,
+and is rejected when the CPU does not advertise support.
+.It Li os
+Valid for both
+.Ar ibs-fetch
+and
+.Ar ibs-op
+events.
+Configure the counter to only sample kernel-mode events.
+Requires Zen 6 IBS extensions
+.Pq CPUID Fn Fn8000_0001B
+.Va EAX[IbsAddrBit63Filtering] ,
+and is rejected when the CPU does not advertise support.
+.It Li fetchlat= Ns Ar value
+Valid only for
+.Ar ibs-fetch
+events.
+Configure the counter to only sample fetches whose latency is greater than or
+equal to
+.Ar value
+core clock cycles.
+The valid range is 128 to 1920 in steps of 128.
+Requires Zen 6 IBS extensions
+.Pq CPUID Fn Fn8000_0001B
+.Va EAX[IbsFetchLatencyFiltering] ,
+and is rejected when the CPU does not advertise support.
 .It Li l3miss
+Valid for both
+.Ar ibs-fetch
+and
+.Ar ibs-op
+events.
 Configure IBS to only sample if an l3miss occurred.
 .It Li ldlat= Ns Ar value
+Valid only for
+.Ar ibs-op
+events.
 Configure the counter to only sample events with load latencies above
 .Ar ldlat .
 IBS only supports filtering latencies that are a multiple of 128 and between
 128 and 2048.
-Load latency filtering can only be used with ibs-op events and imply the
-l3miss qualifier.
+On pre-Zen 6 hardware this qualifier implies the
+.Li l3miss
+qualifier; on Zen 6 and later, latency-only filtering without
+.Li l3miss
+is permitted.
 .It Li opcount
+Valid only for
+.Ar ibs-op
+events.
 Count ops rather than cycles.
 .It Li randomize
+Valid only for
+.Ar ibs-fetch
+events.
 Randomize the sampling rate.
+.It Li streamstore
+Valid only for
+.Ar ibs-op
+events.
+Configure the counter to only sample streaming
+.Pq non-temporal
+store operations.
+Requires Zen 6 IBS extensions
+.Pq CPUID Fn Fn8000_0001B
+.Va EAX[IbsStrmStAndRmtSocket] ,
+and is rejected when the CPU does not advertise support.
 .El
 .Ss AMD IBS Events Specifiers
 The IBS event class provides only two event specifiers:
 .Bl -tag -width indent
 .It Li ibs-fetch Xo
+.Op ,usr
+.Op ,os
+.Op ,fetchlat= Ns Ar value
 .Op ,l3miss
 .Op ,randomize
 .Xc
@@ -124,9 +190,12 @@ The
 .Ar randomize
 qualifier randomly sets the bottom four bits of the sample rate.
 .It Li ibs-op Xo
+.Op ,usr
+.Op ,os
 .Op ,l3miss
 .Op ,ldlat= Ns Ar ldlat
 .Op ,opcount
+.Op ,streamstore
 .Xc
 Collect performance samples during instruction execution.
 The
diff --git a/sys/dev/hwpmc/hwpmc_ibs.c b/sys/dev/hwpmc/hwpmc_ibs.c
index ae14f2ccb14c..16bc5edd5019 100644
--- a/sys/dev/hwpmc/hwpmc_ibs.c
+++ b/sys/dev/hwpmc/hwpmc_ibs.c
@@ -60,9 +60,15 @@ struct ibs_descr {
 static uint64_t ibs_features;
 static uint64_t ibs_fetch_allowed_mask;
 static uint64_t ibs_op_allowed_mask;
+static uint64_t ibs_fetch_ctl2_allowed_mask;
+static uint64_t ibs_op_ctl2_allowed_mask;
+static bool ibs_fetch_ctl2_supported;
+static bool ibs_op_ctl2_supported;
 
 static uint64_t ibs_fetch_extra_mask;
+static uint64_t ibs_fetch_ctl2_extra_mask;
 static uint64_t ibs_op_extra_mask;
+static uint64_t ibs_op_ctl2_extra_mask;
 
 SYSCTL_DECL(_kern_hwpmc);
 
@@ -70,10 +76,18 @@ SYSCTL_U64(_kern_hwpmc, OID_AUTO, ibs_fetch_extra_mask, CTLFLAG_RDTUN,
     &ibs_fetch_extra_mask, 0,
     "Extra allowed bits in the IBS fetch control MSR (override; default 0)");
 
+SYSCTL_U64(_kern_hwpmc, OID_AUTO, ibs_fetch_ctl2_extra_mask, CTLFLAG_RDTUN,
+    &ibs_fetch_ctl2_extra_mask, 0,
+    "Extra allowed bits in the IBS fetch control 2 MSR (override; default 0)");
+
 SYSCTL_U64(_kern_hwpmc, OID_AUTO, ibs_op_extra_mask, CTLFLAG_RDTUN,
     &ibs_op_extra_mask, 0,
     "Extra allowed bits in the IBS op control MSR (override; default 0)");
 
+SYSCTL_U64(_kern_hwpmc, OID_AUTO, ibs_op_ctl2_extra_mask, CTLFLAG_RDTUN,
+    &ibs_op_ctl2_extra_mask, 0,
+    "Extra allowed bits in the IBS op control 2 MSR (override; default 0)");
+
 /*
  * Per-processor information
  */
@@ -92,8 +106,10 @@ ibs_init_policy(void)
 {
 
 	ibs_fetch_allowed_mask = IBS_FETCH_ALLOWED_MASK_BASE;
+	ibs_fetch_ctl2_allowed_mask = 0;
 
 	ibs_op_allowed_mask = IBS_OP_CTL_MAXCNTBASEMASK;
+	ibs_op_ctl2_allowed_mask = 0;
 
 	if ((ibs_features & CPUID_IBSID_ZEN4IBSEXTENSIONS) != 0)
 		ibs_fetch_allowed_mask |= IBS_FETCH_CTL_L3MISSONLY;
@@ -106,6 +122,26 @@ ibs_init_policy(void)
 
 	if ((ibs_features & CPUID_IBSID_ZEN4IBSEXTENSIONS) != 0)
 		ibs_op_allowed_mask |= IBS_OP_CTL_L3MISSONLY;
+
+	if ((ibs_features & CPUID_IBSID_FETCHLATFILTERING) != 0)
+		ibs_fetch_ctl2_allowed_mask |= IBS_FETCH_CTL2_LATFILTERMASK;
+
+	if ((ibs_features & CPUID_IBSID_STRMSTANDRMTSOCKET) != 0)
+		ibs_op_ctl2_allowed_mask |= IBS_OP_CTL2_STRMSTFILTER;
+
+	if ((ibs_features & CPUID_IBSID_IBSDIS) != 0) {
+		ibs_fetch_ctl2_supported = true;
+		ibs_op_ctl2_supported = true;
+	}
+
+	/*
+	 * ctl2 MSRs only exist on Zen 6; writing them on older silicon
+	 * would #GP.
+	 */
+	if (!ibs_fetch_ctl2_supported)
+		ibs_fetch_ctl2_supported = (ibs_fetch_ctl2_allowed_mask != 0);
+	if (!ibs_op_ctl2_supported)
+		ibs_op_ctl2_supported = (ibs_op_ctl2_allowed_mask != 0);
 }
 
 static int
@@ -128,7 +164,12 @@ ibs_validate_op_config(uint64_t config)
 	if ((config & IBS_OP_CTL_LATFLTEN) != 0) {
 		if ((ibs_features & CPUID_IBSID_IBSLOADLATENCYFILT) == 0)
 			return (EINVAL);
-		if ((config & IBS_OP_CTL_L3MISSONLY) == 0)
+		/*
+		 * Zen 6 decouples L3MISSONLY from load-latency filtering
+		 * (AMD pub 69205); enforce the pairing only on older parts.
+		 */
+		if ((ibs_features & CPUID_IBSID_IBSDIS) == 0 &&
+		    (config & IBS_OP_CTL_L3MISSONLY) == 0)
 			return (EINVAL);
 
 		allowed_mask |= IBS_OP_CTL_LDLATMASK | IBS_OP_CTL_L3MISSONLY;
@@ -143,16 +184,67 @@ ibs_validate_op_config(uint64_t config)
 }
 
 static int
-ibs_validate_pmc_config(int ri, uint64_t config)
+ibs_validate_fetch_ctl2_config(uint64_t config)
 {
+	uint64_t allowed_mask;
+
+	if (config == 0)
+		return (0);
+
+	if (!ibs_fetch_ctl2_supported)
+		return (EXTERROR(EINVAL,
+		    "IBS fetch ctl2 features are not supported on this CPU"));
+
+	allowed_mask = ibs_fetch_ctl2_allowed_mask | ibs_fetch_ctl2_extra_mask;
+
+	if ((config & ~allowed_mask) != 0)
+		return (EXTERROR(EINVAL,
+		    "IBS fetch ctl2 config 0x%jx has bits outside allowed"
+		    " mask 0x%jx", (uint64_t)config, (uint64_t)allowed_mask));
+
+	return (0);
+}
+
+static int
+ibs_validate_op_ctl2_config(uint64_t config)
+{
+	uint64_t allowed_mask;
+
+	if (config == 0)
+		return (0);
+
+	if (!ibs_op_ctl2_supported)
+		return (EXTERROR(EINVAL,
+		    "IBS op ctl2 features are not supported on this CPU"));
+
+	allowed_mask = ibs_op_ctl2_allowed_mask | ibs_op_ctl2_extra_mask;
+
+	if ((config & ~allowed_mask) != 0)
+		return (EXTERROR(EINVAL,
+		    "IBS op ctl2 config 0x%jx has bits outside allowed mask"
+		    " 0x%jx", (uint64_t)config, (uint64_t)allowed_mask));
+
+	return (0);
+}
+
+static int
+ibs_validate_pmc_config(int ri, uint64_t config, uint64_t config2)
+{
+	int error;
 
 	switch (ri) {
 	case IBS_PMC_FETCH:
-		return (ibs_validate_fetch_config(config));
+		error = ibs_validate_fetch_config(config);
+		if (error != 0)
+			return (error);
+		return (ibs_validate_fetch_ctl2_config(config2));
 	case IBS_PMC_OP:
-		return (ibs_validate_op_config(config));
+		error = ibs_validate_op_config(config);
+		if (error != 0)
+			return (error);
+		return (ibs_validate_op_ctl2_config(config2));
 	default:
-		return (EINVAL);
+		return (EXTERROR(EINVAL, "invalid IBS PMC index %d", ri));
 	}
 }
 
@@ -266,7 +358,7 @@ static int
 ibs_allocate_pmc(int cpu __unused, int ri, struct pmc *pm,
     const struct pmc_op_pmcallocate *a)
 {
-	uint64_t caps, config;
+	uint64_t caps, config, config2;
 	int error;
 
 	KASSERT(ri >= 0 && ri < IBS_NPMCS,
@@ -284,20 +376,53 @@ ibs_allocate_pmc(int cpu __unused, int ri, struct pmc *pm,
 
 	PMCDBG2(MDP, ALL, 1, "ibs-allocate ri=%d caps=0x%x", ri, caps);
 
-	if ((caps & PMC_CAP_SYSTEM) == 0)
-		return (EXTERROR(EINVAL, "IBS requires SYSTEM capability"));
+	if ((ibs_features & CPUID_IBSID_ADDRBIT63FILTERING) != 0) {
+		if ((caps & (PMC_CAP_USER | PMC_CAP_SYSTEM)) == 0)
+			return (EXTERROR(EINVAL,
+			    "IBS requires at least USER or SYSTEM capability"));
+	} else {
+		if ((caps & PMC_CAP_SYSTEM) == 0)
+			return (EXTERROR(EINVAL,
+			    "IBS requires SYSTEM capability"));
+		if ((caps & PMC_CAP_USER) != 0)
+			return (EXTERROR(EINVAL,
+			    "IBS USER filtering requires Zen 6 addr63 support"));
+	}
 
 	if (!PMC_IS_SAMPLING_MODE(a->pm_mode))
 		return (EINVAL);
 
 	config = a->pm_md.pm_ibs.ibs_ctl;
-	error = ibs_validate_pmc_config(ri, config);
+	config2 = a->pm_md.pm_ibs.ibs_ctl2;
+	error = ibs_validate_pmc_config(ri, config, config2);
 	if (error != 0)
 		return (error);
 	pm->pm_md.pm_ibs.ibs_ctl = config;
+	pm->pm_md.pm_ibs.ibs_ctl2 = config2;
+
+	if ((ibs_features & CPUID_IBSID_ADDRBIT63FILTERING) != 0) {
+		if ((caps & PMC_CAP_USER) != 0 &&
+		    (caps & PMC_CAP_SYSTEM) == 0) {
+			if (ri == IBS_PMC_FETCH)
+				pm->pm_md.pm_ibs.ibs_ctl2 |=
+				    IBS_FETCH_CTL2_EXCLADDR63EQ1;
+			else
+				pm->pm_md.pm_ibs.ibs_ctl2 |=
+				    IBS_OP_CTL2_EXCLRIP63EQ1;
+		} else if ((caps & PMC_CAP_SYSTEM) != 0 &&
+		    (caps & PMC_CAP_USER) == 0) {
+			if (ri == IBS_PMC_FETCH)
+				pm->pm_md.pm_ibs.ibs_ctl2 |=
+				    IBS_FETCH_CTL2_EXCLADDR63EQ0;
+			else
+				pm->pm_md.pm_ibs.ibs_ctl2 |=
+				    IBS_OP_CTL2_EXCLRIP63EQ0;
+		}
+	}
 
-	PMCDBG2(MDP, ALL, 2, "ibs-allocate ri=%d -> config=0x%jx", ri,
-	    config);
+	PMCDBG3(MDP, ALL, 2,
+	    "ibs-allocate ri=%d -> config=0x%jx config2=0x%jx", ri,
+	    config, config2);
 
 	return (0);
 }
@@ -349,16 +474,24 @@ ibs_start_pmc(int cpu __diagused, int ri, struct pmc *pm)
 
 	/*
 	 * Turn on the ENABLE bit.  Zeroing out the control register eliminates
-	 * stale valid bits from spurious NMIs and it resets the counter.
+	 * stale valid bits from spurious NMIs and it resets the counter.  This
+	 * is safe here because the counter is not yet enabled; the NMI re-arm
+	 * path must not do the same (Family 10h erratum #420).
 	 */
 	switch (ri) {
 	case IBS_PMC_FETCH:
 		wrmsr(IBS_FETCH_CTL, 0);
+		if (ibs_fetch_ctl2_supported)
+			wrmsr(IBS_FETCH_CTL2,
+			    pm->pm_md.pm_ibs.ibs_ctl2 & ~IBS_FETCH_CTL2_DISABLE);
 		config = pm->pm_md.pm_ibs.ibs_ctl | IBS_FETCH_CTL_ENABLE;
 		wrmsr(IBS_FETCH_CTL, config);
 		break;
 	case IBS_PMC_OP:
 		wrmsr(IBS_OP_CTL, 0);
+		if (ibs_op_ctl2_supported)
+			wrmsr(IBS_OP_CTL2,
+			    pm->pm_md.pm_ibs.ibs_ctl2 & ~IBS_OP_CTL2_DISABLE);
 		config = pm->pm_md.pm_ibs.ibs_ctl | IBS_OP_CTL_ENABLE;
 		wrmsr(IBS_OP_CTL, config);
 		break;
@@ -374,7 +507,8 @@ static int
 ibs_stop_pmc(int cpu __diagused, int ri, struct pmc *pm)
 {
 	int i;
-	uint64_t config;
+	uint64_t config, config2;
+	bool use_alt_disable;
 
 	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
 	    ("[ibs,%d] illegal CPU value %d", __LINE__, cpu));
@@ -394,23 +528,47 @@ ibs_stop_pmc(int cpu __diagused, int ri, struct pmc *pm)
 	 * are stopping and discard spurious NMIs.  We then retry clearing the
 	 * control register for 50us.  This gives us enough time and ensures
 	 * that the valid bit is not accidently stuck after a spurious NMI.
+	 *
+	 * On Zen 6 with the alternate disable bit (CPUID IbsDis), assert the
+	 * ctl2 DISABLE bit first.  This avoids an RMW hazard in ctl1 that the
+	 * processor may update concurrently while sampling.
 	 */
 	config = pm->pm_md.pm_ibs.ibs_ctl;
+	config2 = pm->pm_md.pm_ibs.ibs_ctl2;
+	use_alt_disable = (ibs_features & CPUID_IBSID_IBSDIS) != 0;
 
 	atomic_store_int(&ibs_pcpu[cpu]->pc_status, IBS_CPU_STOPPING);
 
+	/*
+	 * On Zen 6, ctl2 DISABLE is the authoritative stop switch; skip
+	 * the legacy ctl1 RMW and clear it directly
+	 */
 	switch (ri) {
 	case IBS_PMC_FETCH:
-		wrmsr(IBS_FETCH_CTL, config & ~IBS_FETCH_CTL_MAXCNTMASK);
-		DELAY(1);
-		config &= ~IBS_FETCH_CTL_ENABLE;
-		wrmsr(IBS_FETCH_CTL, config);
+		if (use_alt_disable) {
+			wrmsr(IBS_FETCH_CTL2,
+			    config2 | IBS_FETCH_CTL2_DISABLE);
+			wrmsr(IBS_FETCH_CTL, config & ~IBS_FETCH_CTL_ENABLE);
+		} else {
+			wrmsr(IBS_FETCH_CTL,
+			    config & ~IBS_FETCH_CTL_MAXCNTMASK);
+			DELAY(1);
+			config &= ~IBS_FETCH_CTL_ENABLE;
+			wrmsr(IBS_FETCH_CTL, config);
+		}
 		break;
 	case IBS_PMC_OP:
-		wrmsr(IBS_OP_CTL, config & ~IBS_OP_CTL_MAXCNTMASK);
-		DELAY(1);
-		config &= ~IBS_OP_CTL_ENABLE;
-		wrmsr(IBS_OP_CTL, config);
+		if (use_alt_disable) {
+			wrmsr(IBS_OP_CTL2,
+			    config2 | IBS_OP_CTL2_DISABLE);
+			wrmsr(IBS_OP_CTL, config & ~IBS_OP_CTL_ENABLE);
+		} else {
+			wrmsr(IBS_OP_CTL,
+			    config & ~IBS_OP_CTL_MAXCNTMASK);
+			DELAY(1);
+			config &= ~IBS_OP_CTL_ENABLE;
+			wrmsr(IBS_OP_CTL, config);
+		}
 		break;
 	}
 
@@ -420,9 +578,13 @@ ibs_stop_pmc(int cpu __diagused, int ri, struct pmc *pm)
 		switch (ri) {
 		case IBS_PMC_FETCH:
 			wrmsr(IBS_FETCH_CTL, 0);
+			if (ibs_fetch_ctl2_supported)
+				wrmsr(IBS_FETCH_CTL2, 0);
 			break;
 		case IBS_PMC_OP:
 			wrmsr(IBS_OP_CTL, 0);
+			if (ibs_op_ctl2_supported)
+				wrmsr(IBS_OP_CTL2, 0);
 			break;
 		}
 	}
@@ -456,6 +618,9 @@ pmc_ibs_process_fetch(struct pmc *pm, struct trapframe *tf, uint64_t config)
 		mpd.pl_mpdata[PMC_MPIDX_FETCH_PHYSADDR] =
 		    rdmsr(IBS_FETCH_PHYSADDR);
 	}
+	if (ibs_fetch_ctl2_supported) {
+		mpd.pl_mpdata[PMC_MPIDX_FETCH_CTL2] = rdmsr(IBS_FETCH_CTL2);
+	}
 
 	pmc_process_interrupt_mp(PMC_HR, pm, tf, &mpd);
 
@@ -490,6 +655,9 @@ pmc_ibs_process_op(struct pmc *pm, struct trapframe *tf, uint64_t config)
 	if ((ibs_features & CPUID_IBSID_IBSOPDATA4) != 0) {
 		mpd.pl_mpdata[PMC_MPIDX_OP_DATA4] = rdmsr(IBS_OP_DATA4);
 	}
+	if (ibs_op_ctl2_supported) {
+		mpd.pl_mpdata[PMC_MPIDX_OP_CTL2] = rdmsr(IBS_OP_CTL2);
+	}
 
 	pmc_process_interrupt_mp(PMC_HR, pm, tf, &mpd);
 
@@ -647,6 +815,10 @@ ibs_pcpu_fini(struct pmc_mdep *md, int cpu)
 	 */
 	wrmsr(IBS_FETCH_CTL, 0);
 	wrmsr(IBS_OP_CTL, 0);
+	if (ibs_fetch_ctl2_supported)
+		wrmsr(IBS_FETCH_CTL2, 0);
+	if (ibs_op_ctl2_supported)
+		wrmsr(IBS_OP_CTL2, 0);
 
 	/*
 	 * Free up allocated space.
diff --git a/sys/dev/hwpmc/hwpmc_ibs.h b/sys/dev/hwpmc/hwpmc_ibs.h
index 433397954d4f..d1474b7cba32 100644
--- a/sys/dev/hwpmc/hwpmc_ibs.h
+++ b/sys/dev/hwpmc/hwpmc_ibs.h
@@ -35,6 +35,9 @@
 /*
  * All of the CPUID definitions come from AMD PPR Vol 1 for AMD Family 1Ah
  * Model 02h C1 (57238) 2024-09-29 Revision 0.24.
+ * Zen 6 CPUID bits (IBSDIS, FETCHLATFILTERING, ADDRBIT63FILTERING) come from
+ * AMD64 Architecture Programmer's Manual Volume 2: System Programming (24593)
+ * 2025-07-02 Version 3.43.
  */
 #define	CPUID_IBSID			0x8000001B
 #define	CPUID_IBSID_IBSFFV		0x00000001 /* IBS Feature Flags Valid */
@@ -50,6 +53,12 @@
 #define	CPUID_IBSID_IBSOPDATA4		0x00000400 /* IBS OP DATA4 */
 #define	CPUID_IBSID_ZEN4IBSEXTENSIONS	0x00000800 /* IBS Zen 4 Extensions */
 #define	CPUID_IBSID_IBSLOADLATENCYFILT	0x00001000 /* Load Latency Filtering */
+#define	CPUID_IBSID_IBSDIS		0x00002000 /* Alternate IBS Disable */
+#define	CPUID_IBSID_FETCHLATFILTERING	0x00004000 /* Fetch Latency Filter */
+#define	CPUID_IBSID_ADDRBIT63FILTERING	0x00008000 /* Addr Bit 63 Filter */
+#define	CPUID_IBSID_STRMSTANDRMTSOCKET	0x00010000 /* StrmSt + RmtSocket */
+#define	CPUID_IBSID_BUFFERV1		0x00020000 /* IBS Buffering V1 */
+#define	CPUID_IBSID_MEMPROFILERV1	0x00040000 /* IBS Memory Profiler V1 */
 #define	CPUID_IBSID_IBSUPDTDDTLBSTATS	0x00080000 /* Simplified DTLB Stats */
 
 /*
@@ -107,11 +116,27 @@
 #define IBS_FETCH_PHYSADDR		0xC0011032 /* Fetch Physical Address */
 #define IBS_FETCH_EXTCTL		0xC001103C /* Fetch Control Extended */
 
+/* IBS Fetch Control 2 (Zen 6) */
+#define IBS_FETCH_CTL2			0xC001103F /* IBS Fetch Control 2 */
+#define IBS_FETCH_CTL2_DISABLE		(1ULL << 0) /* IBS Fetch Disable */
+#define IBS_FETCH_CTL2_LATFILTERMASK	(0xFULL << 1) /* Fetch Latency Filter */
+#define IBS_FETCH_CTL2_EXCLADDR63EQ1	(1ULL << 5) /* Exclude addr bit63=1 */
+#define IBS_FETCH_CTL2_EXCLADDR63EQ0	(1ULL << 6) /* Exclude addr bit63=0 */
+#define IBS_FETCH_CTL2_ADDR63MASK	(IBS_FETCH_CTL2_EXCLADDR63EQ0 | \
+    IBS_FETCH_CTL2_EXCLADDR63EQ1)
+
+#define IBS_FETCH_CTL2_LAT_MIN		128
+#define IBS_FETCH_CTL2_LAT_MAX		1920
+#define IBS_FETCH_CTL2_LAT_STEP		128
+#define IBS_FETCH_CTL2_LAT_TO_CTL(_l)	((((_l) >> 7) & 0xFULL) << 1)
+#define IBS_FETCH_CTL2_CTL_TO_LAT(_c)	((((_c) >> 1) & 0xFULL) << 7)
+
 #define PMC_MPIDX_FETCH_CTL		0
 #define PMC_MPIDX_FETCH_EXTCTL		1
 #define PMC_MPIDX_FETCH_LINADDR		2
 #define PMC_MPIDX_FETCH_PHYSADDR	3
-#define PMC_MPIDX_FETCH_MAX		(PMC_MPIDX_FETCH_PHYSADDR + 1)
+#define PMC_MPIDX_FETCH_CTL2		4
+#define PMC_MPIDX_FETCH_MAX		(PMC_MPIDX_FETCH_CTL2 + 1)
 
 /* IBS Execution Control */
 #define IBS_OP_CTL			0xC0011033 /* IBS Execution Control */
@@ -148,6 +173,8 @@
 #define IBS_OP_DATA_RETURN		(1ULL << 34) /* Return */
 
 #define IBS_OP_DATA2			0xC0011036 /* IBS Op Data 2 */
+#define IBS_OP_DATA2_RMTSOCKET		(1ULL << 9)  /* Remote Socket */
+#define IBS_OP_DATA2_STRMST		(1ULL << 8)  /* Streaming Store */
 #define IBS_OP_DATA3			0xC0011037 /* IBS Op Data 3 */
 #define IBS_OP_DATA3_DCPHYADDRVALID	(1ULL << 18) /* DC Physical Address */
 #define IBS_OP_DATA3_DCLINADDRVALID	(1ULL << 17) /* DC Linear Address */
@@ -169,6 +196,15 @@
 #define IBS_OP_DATA4			0xC001103D /* IBS Op Data 4 */
 #define IBS_OP_DATA4_LDRESYNC		(1ULL << 0)  /* Load Resync */
 
+/* IBS Execution Control 2 (Zen 6) */
+#define IBS_OP_CTL2			0xC001103E /* IBS Execution Control 2 */
+#define IBS_OP_CTL2_DISABLE		(1ULL << 0) /* IBS Execution Disable */
+#define IBS_OP_CTL2_EXCLRIP63EQ0	(1ULL << 1) /* Exclude RIP bit63=0 */
+#define IBS_OP_CTL2_EXCLRIP63EQ1	(1ULL << 2) /* Exclude RIP bit63=1 */
+#define IBS_OP_CTL2_STRMSTFILTER	(1ULL << 3) /* Streaming Store Filter */
+#define IBS_OP_CTL2_RIP63MASK		(IBS_OP_CTL2_EXCLRIP63EQ0 | \
+    IBS_OP_CTL2_EXCLRIP63EQ1)
+
 #define PMC_MPIDX_OP_CTL		0
 #define PMC_MPIDX_OP_RIP		1
 #define PMC_MPIDX_OP_DATA		2
@@ -178,7 +214,8 @@
 #define PMC_MPIDX_OP_DC_PHYSADDR	6
 #define PMC_MPIDX_OP_TGT_RIP		7
 #define PMC_MPIDX_OP_DATA4		8
-#define PMC_MPIDX_OP_MAX		(PMC_MPIDX_OP_DATA4 + 1)
+#define PMC_MPIDX_OP_CTL2		9
+#define PMC_MPIDX_OP_MAX		(PMC_MPIDX_OP_CTL2 + 1)
 
 /*
  * IBS data is encoded as using the multipart flag in the existing callchain
@@ -204,8 +241,8 @@ struct pmc_md_ibs_pmc {
 	uint64_t	ibs_ctl2;
 };
 
-#define IBS_PMC_CAPS			(PMC_CAP_INTERRUPT | PMC_CAP_SYSTEM | \
-	PMC_CAP_EDGE | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE)
+#define IBS_PMC_CAPS			(PMC_CAP_INTERRUPT | PMC_CAP_USER | \
+	PMC_CAP_SYSTEM | PMC_CAP_EDGE | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE)
 
 int	pmc_ibs_initialize(struct pmc_mdep *md, int ncpu);
 void	pmc_ibs_finalize(struct pmc_mdep *md);
diff --git a/usr.sbin/pmcstat/pmcstat_log.c b/usr.sbin/pmcstat/pmcstat_log.c
index 9af2ccc4e365..77a6ea799594 100644
--- a/usr.sbin/pmcstat/pmcstat_log.c
+++ b/usr.sbin/pmcstat/pmcstat_log.c
@@ -371,10 +371,10 @@ pmcstat_pmcindex_to_pmcr(int pmcin)
 
 #if defined(__amd64__) || defined(__i386__)
 static void
-pmcstat_print_ibs_fetch(struct pmclog_ev_callchain *cc, int offset)
+pmcstat_print_ibs_fetch(struct pmclog_ev_callchain *cc, int offset, int len64)
 {
 	uint64_t *ibsbuf = (uint64_t *)&cc->pl_pc[offset];
-	uint64_t ctl;
+	uint64_t ctl, ctl2;
 
 	ctl = ibsbuf[PMC_MPIDX_FETCH_CTL];
 	PMCSTAT_PRINT_ENTRY("ibs-fetch", "%s%s%s%s",
@@ -390,15 +390,28 @@ pmcstat_print_ibs_fetch(struct pmclog_ev_callchain *cc, int offset)
 		PMCSTAT_PRINT_ENTRY("IBS", "Physical Address %" PRIx64,
 		    ibsbuf[PMC_MPIDX_FETCH_PHYSADDR]);
 	}
+	if (len64 > PMC_MPIDX_FETCH_CTL2) {
+		ctl2 = ibsbuf[PMC_MPIDX_FETCH_CTL2];
+		if ((ctl2 & IBS_FETCH_CTL2_EXCLADDR63EQ1) != 0)
+			PMCSTAT_PRINT_ENTRY("ibs-fetch", "addr63=0");
+		if ((ctl2 & IBS_FETCH_CTL2_EXCLADDR63EQ0) != 0)
+			PMCSTAT_PRINT_ENTRY("ibs-fetch", "addr63=1");
+		if ((ctl2 & IBS_FETCH_CTL2_LATFILTERMASK) != 0) {
+			PMCSTAT_PRINT_ENTRY("ibs-fetch",
+			    "fetchlat>=%" PRIu64,
+			    (uint64_t)IBS_FETCH_CTL2_CTL_TO_LAT(ctl2));
+		}
+	}
 }
 
 static void
-pmcstat_print_ibs_op(struct pmclog_ev_callchain *cc, int offset)
+pmcstat_print_ibs_op(struct pmclog_ev_callchain *cc, int offset, int len64)
 {
 	uint64_t *ibsbuf = (uint64_t *)&cc->pl_pc[offset];
-	uint64_t data, data3;
+	uint64_t data, data2, data3, ctl2;
 
 	data = ibsbuf[PMC_MPIDX_OP_DATA];
+	data2 = ibsbuf[PMC_MPIDX_OP_DATA2];
 	data3 = ibsbuf[PMC_MPIDX_OP_DATA3];
 
 	if ((data & IBS_OP_DATA_RIPINVALID) == 0) {
@@ -416,6 +429,11 @@ pmcstat_print_ibs_op(struct pmclog_ev_callchain *cc, int offset)
 	    (data3 & IBS_OP_DATA3_LOCKEDOP) ? "lock " : "",
 	    (data3 & IBS_OP_DATA3_DCL1TLBMISS) ? "l1tlbmiss " : "",
 	    (data3 & IBS_OP_DATA3_DCMISS) ? "dcmiss " : "");
+	if ((data2 & (IBS_OP_DATA2_STRMST | IBS_OP_DATA2_RMTSOCKET)) != 0) {
+		PMCSTAT_PRINT_ENTRY("ibs-op", "%s%s",
+		    (data2 & IBS_OP_DATA2_STRMST) ? "streamstore " : "",
+		    (data2 & IBS_OP_DATA2_RMTSOCKET) ? "remotesocket" : "");
+	}
 	PMCSTAT_PRINT_ENTRY("ibs-op", "Latency %" PRIu64,
 	    IBS_OP_DATA3_TO_DCLAT(data3));
 	if ((data3 & IBS_OP_DATA3_DCLINADDRVALID) != 0) {
@@ -426,6 +444,15 @@ pmcstat_print_ibs_op(struct pmclog_ev_callchain *cc, int offset)
 		PMCSTAT_PRINT_ENTRY("ibs-op", "Physical Address %" PRIx64,
 		    ibsbuf[PMC_MPIDX_OP_DC_PHYSADDR]);
 	}
+	if (len64 > PMC_MPIDX_OP_CTL2) {
+		ctl2 = ibsbuf[PMC_MPIDX_OP_CTL2];
+		if ((ctl2 & IBS_OP_CTL2_EXCLRIP63EQ1) != 0)
+			PMCSTAT_PRINT_ENTRY("ibs-op", "addr63=0");
+		if ((ctl2 & IBS_OP_CTL2_EXCLRIP63EQ0) != 0)
+			PMCSTAT_PRINT_ENTRY("ibs-op", "addr63=1");
+		if ((ctl2 & IBS_OP_CTL2_STRMSTFILTER) != 0)
+			PMCSTAT_PRINT_ENTRY("ibs-op", "streamstore");
+	}
 }
 #endif
 
@@ -446,9 +473,11 @@ pmcstat_print_multipart(struct pmclog_ev_callchain *cc)
 			return (offset);
 #if defined(__amd64__) || defined(__i386__)
 		} else if (type == PMC_CC_MULTIPART_IBS_FETCH) {
-			pmcstat_print_ibs_fetch(cc, offset);
+			pmcstat_print_ibs_fetch(cc, offset,
+			    len / (sizeof(uint64_t) / sizeof(uintptr_t)));
 		} else if (type == PMC_CC_MULTIPART_IBS_OP) {
-			pmcstat_print_ibs_op(cc, offset);
+			pmcstat_print_ibs_op(cc, offset,
+			    len / (sizeof(uint64_t) / sizeof(uintptr_t)));
 #endif
 		} else {
 			PMCSTAT_PRINT_ENTRY("unsupported multipart type!");


home | help

Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?6a2ad064.272c3.4e5d56a>