Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 31 Jan 2020 17:40:42 +0000 (UTC)
From:      Conrad Meyer <cem@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r357336 - in head: share/man/man4 sys/x86/cpufreq
Message-ID:  <202001311740.00VHegIT085315@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: cem
Date: Fri Jan 31 17:40:41 2020
New Revision: 357336
URL: https://svnweb.freebsd.org/changeset/base/357336

Log:
  hwpstate(4): Ignore CurPstateLimit by default
  
  Add a sysctl knob to allow users to re-enable it, and document the knob and
  default in cpufreq.4.  (While here, add a few unrelated updates to
  cpufreq.4.)
  
  It seems that the register value in some hardware simply reflects the
  configured P-state.  This results in an inadvertent and unintended outcome
  where the P-state can only walk down, and then the driver becomes "stuck" in
  the slowest possible P-state.
  
  The Linux driver never consults this register, so that's some evidence that
  ignoring the contents are relatively harmless.
  
  PR:		234733
  Reported by:	sigsys AT gmail.com, Erich Dollanksy <freebsd.ed.lists AT
  		sumeritec.com>

Modified:
  head/share/man/man4/cpufreq.4
  head/sys/x86/cpufreq/hwpstate_amd.c

Modified: head/share/man/man4/cpufreq.4
==============================================================================
--- head/share/man/man4/cpufreq.4	Fri Jan 31 15:56:08 2020	(r357335)
+++ head/share/man/man4/cpufreq.4	Fri Jan 31 17:40:41 2020	(r357336)
@@ -24,7 +24,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd January 22, 2020
+.Dd January 31, 2020
 .Dt CPUFREQ 4
 .Os
 .Sh NAME
@@ -73,9 +73,13 @@ has passed (e.g., the system has cooled sufficiently).
 If a sysctl cannot be set due to an override condition, it will return
 .Er EPERM .
 .Pp
-The frequency cannot be changed if TSC is in use as the timecounter.
+The frequency cannot be changed if TSC is in use as the timecounter and the
+hardware does not support invariant TSC.
 This is because the timecounter system needs to use a source that has a
 constant rate.
+(On invariant TSC hardware, the TSC runs at the P0 rate regardless of the
+configured P-state.)
+Modern hardware mostly has invariant TSC.
 The timecounter source can be changed with the
 .Pa kern.timecounter.hardware
 sysctl.
@@ -105,6 +109,15 @@ some systems.
 .It Va debug.cpufreq.verbose
 Print verbose messages.
 This setting is also accessible via a tunable with the same name.
+.It Va debug.hwpstate_pstate_limit
+If enabled, the AMD hwpstate driver limits administrative control of P-states
+(including by
+.Xr powerd 8 )
+to the value in the 0xc0010061 MSR, known as "PStateCurLim[CurPstateLimit]."
+It is disabled (0) by default.
+On some hardware, the limit register seems to simply follow the configured
+P-state, which results in the inability to ever raise the P-state back to P0
+from a reduced frequency state.
 .El
 .Sh SUPPORTED DRIVERS
 The following device drivers offer absolute frequency control via the
@@ -112,11 +125,15 @@ The following device drivers offer absolute frequency 
 interface.
 Usually, only one of these can be active at a time.
 .Pp
-.Bl -tag -compact -width ".Pa acpi_perf"
+.Bl -tag -compact -width ".Pa hwpstate_intel"
 .It Pa acpi_perf
 ACPI CPU performance states
 .It Pa est
 Intel Enhanced SpeedStep
+.It Pa hwpstate
+AMD Cool'n'Quiet2 used in K10 through Family 17h
+.It Pa hwpstate_intel
+Intel SpeedShift driver
 .It Pa ichss
 Intel SpeedStep for ICH
 .It Pa powernow

Modified: head/sys/x86/cpufreq/hwpstate_amd.c
==============================================================================
--- head/sys/x86/cpufreq/hwpstate_amd.c	Fri Jan 31 15:56:08 2020	(r357335)
+++ head/sys/x86/cpufreq/hwpstate_amd.c	Fri Jan 31 17:40:41 2020	(r357336)
@@ -131,6 +131,12 @@ static int	hwpstate_verify;
 SYSCTL_INT(_debug, OID_AUTO, hwpstate_verify, CTLFLAG_RWTUN,
     &hwpstate_verify, 0, "Verify P-state after setting");
 
+static bool	hwpstate_pstate_limit;
+SYSCTL_BOOL(_debug, OID_AUTO, hwpstate_pstate_limit, CTLFLAG_RWTUN,
+    &hwpstate_pstate_limit, 0,
+    "If enabled (1), limit administrative control of P-states to the value in "
+    "CurPstateLimit");
+
 static device_method_t hwpstate_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_identify,	hwpstate_identify),
@@ -161,7 +167,8 @@ static driver_t hwpstate_driver = {
 DRIVER_MODULE(hwpstate, cpu, hwpstate_driver, hwpstate_devclass, 0, 0);
 
 /*
- * Go to Px-state on all cpus considering the limit.
+ * Go to Px-state on all cpus, considering the limit register (if so
+ * configured).
  */
 static int
 hwpstate_goto_pstate(device_t dev, int id)
@@ -170,14 +177,15 @@ hwpstate_goto_pstate(device_t dev, int id)
 	uint64_t msr;
 	int cpu, i, j, limit;
 
-	/* get the current pstate limit */
-	msr = rdmsr(MSR_AMD_10H_11H_LIMIT);
-	limit = AMD_10H_11H_GET_PSTATE_LIMIT(msr);
-	if (limit > id) {
-		HWPSTATE_DEBUG(dev,
-		    "Restricting requested P%d to P%d due to HW limit\n", id,
-		    limit);
-		id = limit;
+	if (hwpstate_pstate_limit) {
+		/* get the current pstate limit */
+		msr = rdmsr(MSR_AMD_10H_11H_LIMIT);
+		limit = AMD_10H_11H_GET_PSTATE_LIMIT(msr);
+		if (limit > id) {
+			HWPSTATE_DEBUG(dev, "Restricting requested P%d to P%d "
+			    "due to HW limit\n", id, limit);
+			id = limit;
+		}
 	}
 
 	cpu = curcpu;



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202001311740.00VHegIT085315>