From 317dd50e80775434a2ea593ad8522e728ed94e9d Mon Sep 17 00:00:00 2001 From: Stratos Karafotis Date: Fri, 18 Jul 2014 08:37:17 -0700 Subject: [PATCH 01/18] cpufreq: intel_pstate: Make intel_pstate_kobject and debugfs_parent locals Since we never remove sysfs entry and debugfs files, we can make the intel_pstate_kobject and debugfs_parent locals. Also, annotate with __init intel_pstate_sysfs_expose_params() and intel_pstate_debug_expose_params() in order to be freed after bootstrap. Signed-off-by: Stratos Karafotis Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 86631cb6f7de..601c428cab94 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -253,9 +253,9 @@ static struct pid_param pid_files[] = { {NULL, NULL} }; -static struct dentry *debugfs_parent; -static void intel_pstate_debug_expose_params(void) +static void __init intel_pstate_debug_expose_params(void) { + struct dentry *debugfs_parent; int i = 0; debugfs_parent = debugfs_create_dir("pstate_snb", NULL); @@ -342,10 +342,10 @@ static struct attribute *intel_pstate_attributes[] = { static struct attribute_group intel_pstate_attr_group = { .attrs = intel_pstate_attributes, }; -static struct kobject *intel_pstate_kobject; -static void intel_pstate_sysfs_expose_params(void) +static void __init intel_pstate_sysfs_expose_params(void) { + struct kobject *intel_pstate_kobject; int rc; intel_pstate_kobject = kobject_create_and_add("intel_pstate", From fa30dff9a81ea9fdc2e985a14fe14ce6393a3214 Mon Sep 17 00:00:00 2001 From: Stratos Karafotis Date: Fri, 18 Jul 2014 08:37:18 -0700 Subject: [PATCH 02/18] cpufreq: intel_pstate: Remove unnecessary type casting in div_s64() call div_s64() accepts the divisor parameter as s32. Helper div_fp() also accepts divisor as int32_t. So, remove the unnecessary int64_t type casting. Signed-off-by: Stratos Karafotis Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 601c428cab94..e5fd78072f8b 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -50,7 +50,7 @@ static inline int32_t mul_fp(int32_t x, int32_t y) static inline int32_t div_fp(int32_t x, int32_t y) { - return div_s64((int64_t)x << FRAC_BITS, (int64_t)y); + return div_s64((int64_t)x << FRAC_BITS, y); } struct sample { From 845c1cbef08c87d2a4e7ca3c82ac2363637fdcb9 Mon Sep 17 00:00:00 2001 From: Stratos Karafotis Date: Fri, 18 Jul 2014 08:37:19 -0700 Subject: [PATCH 03/18] cpufreq: intel_pstate: Add missing blank lines after declarations Also, remove unnecessary blank lines. Signed-off-by: Stratos Karafotis Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index e5fd78072f8b..e9f3048eb6f2 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -37,7 +37,6 @@ #define BYT_TURBO_RATIOS 0x66c #define BYT_TURBO_VIDS 0x66d - #define FRAC_BITS 8 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS) #define fp_toint(X) ((X) >> FRAC_BITS) @@ -167,7 +166,6 @@ static inline void pid_i_gain_set(struct _pid *pid, int percent) static inline void pid_d_gain_set(struct _pid *pid, int percent) { - pid->d_gain = div_fp(int_tofp(percent), int_tofp(100)); } @@ -217,6 +215,7 @@ static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu) static inline void intel_pstate_reset_all_pid(void) { unsigned int cpu; + for_each_online_cpu(cpu) { if (all_cpu_data[cpu]) intel_pstate_busy_pid_reset(all_cpu_data[cpu]); @@ -230,6 +229,7 @@ static int pid_param_set(void *data, u64 val) intel_pstate_reset_all_pid(); return 0; } + static int pid_param_get(void *data, u64 *val) { *val = *(u32 *)data; @@ -284,6 +284,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, { unsigned int input; int ret; + ret = sscanf(buf, "%u", &input); if (ret != 1) return -EINVAL; @@ -300,6 +301,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, { unsigned int input; int ret; + ret = sscanf(buf, "%u", &input); if (ret != 1) return -EINVAL; @@ -307,6 +309,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, limits.max_sysfs_pct = clamp_t(int, input, 0 , 100); limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct); limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100)); + return count; } @@ -315,6 +318,7 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, { unsigned int input; int ret; + ret = sscanf(buf, "%u", &input); if (ret != 1) return -EINVAL; @@ -360,6 +364,7 @@ static void __init intel_pstate_sysfs_expose_params(void) static int byt_get_min_pstate(void) { u64 value; + rdmsrl(BYT_RATIOS, value); return (value >> 8) & 0x7F; } @@ -367,6 +372,7 @@ static int byt_get_min_pstate(void) static int byt_get_max_pstate(void) { u64 value; + rdmsrl(BYT_RATIOS, value); return (value >> 16) & 0x7F; } @@ -374,6 +380,7 @@ static int byt_get_max_pstate(void) static int byt_get_turbo_pstate(void) { u64 value; + rdmsrl(BYT_TURBO_RATIOS, value); return value & 0x7F; } @@ -407,7 +414,6 @@ static void byt_get_vid(struct cpudata *cpudata) { u64 value; - rdmsrl(BYT_VIDS, value); cpudata->vid.min = int_tofp((value >> 8) & 0x7f); cpudata->vid.max = int_tofp((value >> 16) & 0x7f); @@ -420,10 +426,10 @@ static void byt_get_vid(struct cpudata *cpudata) cpudata->vid.turbo = value & 0x7f; } - static int core_get_min_pstate(void) { u64 value; + rdmsrl(MSR_PLATFORM_INFO, value); return (value >> 40) & 0xFF; } @@ -431,6 +437,7 @@ static int core_get_min_pstate(void) static int core_get_max_pstate(void) { u64 value; + rdmsrl(MSR_PLATFORM_INFO, value); return (value >> 8) & 0xFF; } @@ -439,6 +446,7 @@ static int core_get_turbo_pstate(void) { u64 value; int nont, ret; + rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value); nont = core_get_max_pstate(); ret = ((value) & 255); @@ -493,12 +501,12 @@ static struct cpu_defaults byt_params = { }, }; - static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) { int max_perf = cpu->pstate.turbo_pstate; int max_perf_adj; int min_perf; + if (limits.no_turbo) max_perf = cpu->pstate.max_pstate; From 2d8d1f18ed2b51bcc65a99bf940514b1a697b5f2 Mon Sep 17 00:00:00 2001 From: Stratos Karafotis Date: Fri, 18 Jul 2014 08:37:20 -0700 Subject: [PATCH 04/18] cpufreq: intel_pstate: Fit code in a single line where possible We can fit these lines in a single one, under the 80 characters limit. Signed-off-by: Stratos Karafotis Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index e9f3048eb6f2..424c5a98771d 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -205,11 +205,7 @@ static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu) pid_d_gain_set(&cpu->pid, pid_params.d_gain_pct); pid_i_gain_set(&cpu->pid, pid_params.i_gain_pct); - pid_reset(&cpu->pid, - pid_params.setpoint, - 100, - pid_params.deadband, - 0); + pid_reset(&cpu->pid, pid_params.setpoint, 100, pid_params.deadband, 0); } static inline void intel_pstate_reset_all_pid(void) @@ -235,8 +231,7 @@ static int pid_param_get(void *data, u64 *val) *val = *(u32 *)data; return 0; } -DEFINE_SIMPLE_ATTRIBUTE(fops_pid_param, pid_param_get, - pid_param_set, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(fops_pid_param, pid_param_get, pid_param_set, "%llu\n"); struct pid_param { char *name; @@ -355,8 +350,7 @@ static void __init intel_pstate_sysfs_expose_params(void) intel_pstate_kobject = kobject_create_and_add("intel_pstate", &cpu_subsys.dev_root->kobj); BUG_ON(!intel_pstate_kobject); - rc = sysfs_create_group(intel_pstate_kobject, - &intel_pstate_attr_group); + rc = sysfs_create_group(intel_pstate_kobject, &intel_pstate_attr_group); BUG_ON(rc); } @@ -515,8 +509,7 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.min_perf)); - *min = clamp_t(int, min_perf, - cpu->pstate.min_pstate, max_perf); + *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); } static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate) @@ -713,8 +706,7 @@ static int intel_pstate_init_cpu(unsigned int cpunum) init_timer_deferrable(&cpu->timer); cpu->timer.function = intel_pstate_timer_func; - cpu->timer.data = - (unsigned long)cpu; + cpu->timer.data = (unsigned long)cpu; cpu->timer.expires = jiffies + HZ/100; intel_pstate_busy_pid_reset(cpu); intel_pstate_sample(cpu); From 285cb99091fad1416958eb7d9fb8ecf7328d8bef Mon Sep 17 00:00:00 2001 From: Stratos Karafotis Date: Fri, 18 Jul 2014 08:37:21 -0700 Subject: [PATCH 05/18] cpufreq: intel_pstate: Cleanup parentheses Remove unnecessary parentheses. Also, add parentheses in one case for better readability. Signed-off-by: Stratos Karafotis Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 424c5a98771d..dee9e5882937 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -443,7 +443,7 @@ static int core_get_turbo_pstate(void) rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value); nont = core_get_max_pstate(); - ret = ((value) & 255); + ret = (value) & 255; if (ret <= nont) ret = nont; return ret; @@ -617,7 +617,7 @@ static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) current_pstate = int_tofp(cpu->pstate.current_pstate); core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); - sample_time = (pid_params.sample_rate_ms * USEC_PER_MSEC); + sample_time = pid_params.sample_rate_ms * USEC_PER_MSEC; duration_us = (u32) ktime_us_delta(cpu->sample.time, cpu->last_sample_time); if (duration_us > sample_time * 3) { @@ -751,7 +751,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) limits.min_perf_pct = clamp_t(int, limits.min_perf_pct, 0 , 100); limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100)); - limits.max_policy_pct = policy->max * 100 / policy->cpuinfo.max_freq; + limits.max_policy_pct = (policy->max * 100) / policy->cpuinfo.max_freq; limits.max_policy_pct = clamp_t(int, limits.max_policy_pct, 0 , 100); limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct); limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100)); @@ -763,8 +763,8 @@ static int intel_pstate_verify_policy(struct cpufreq_policy *policy) { cpufreq_verify_within_cpu_limits(policy); - if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) && - (policy->policy != CPUFREQ_POLICY_PERFORMANCE)) + if (policy->policy != CPUFREQ_POLICY_POWERSAVE && + policy->policy != CPUFREQ_POLICY_PERFORMANCE) return -EINVAL; return 0; From abf013bffeb55744fb6d1ff9bb30acaabe6302ab Mon Sep 17 00:00:00 2001 From: Stratos Karafotis Date: Fri, 18 Jul 2014 08:37:22 -0700 Subject: [PATCH 06/18] cpufreq: intel_pstate: Remove unnecessary intermediate variable sample_time Remove the unnecessary intermediate assignment and use directly the pid_params.sample_rate_ms variable. Signed-off-by: Stratos Karafotis Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index dee9e5882937..e0ed078988c5 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -599,10 +599,9 @@ static inline void intel_pstate_sample(struct cpudata *cpu) static inline void intel_pstate_set_sample_time(struct cpudata *cpu) { - int sample_time, delay; + int delay; - sample_time = pid_params.sample_rate_ms; - delay = msecs_to_jiffies(sample_time); + delay = msecs_to_jiffies(pid_params.sample_rate_ms); mod_timer_pinned(&cpu->timer, jiffies + delay); } From c410833a3c96b325c68987c2544becad39079c33 Mon Sep 17 00:00:00 2001 From: Stratos Karafotis Date: Fri, 18 Jul 2014 08:37:23 -0700 Subject: [PATCH 07/18] cpufreq: intel_pstate: Align multiple lines to open parenthesis Suppress checkpatch.pl --strict warnings: CHECK: Alignment should match open parenthesis Signed-off-by: Stratos Karafotis Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index e0ed078988c5..9d75bd6faf65 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -147,7 +147,7 @@ static struct perf_limits limits = { }; static inline void pid_reset(struct _pid *pid, int setpoint, int busy, - int deadband, int integral) { + int deadband, int integral) { pid->setpoint = setpoint; pid->deadband = deadband; pid->integral = int_tofp(integral); @@ -258,8 +258,8 @@ static void __init intel_pstate_debug_expose_params(void) return; while (pid_files[i].name) { debugfs_create_file(pid_files[i].name, 0660, - debugfs_parent, pid_files[i].value, - &fops_pid_param); + debugfs_parent, pid_files[i].value, + &fops_pid_param); i++; } } @@ -275,7 +275,7 @@ static void __init intel_pstate_debug_expose_params(void) } static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, - const char *buf, size_t count) + const char *buf, size_t count) { unsigned int input; int ret; @@ -292,7 +292,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, } static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, - const char *buf, size_t count) + const char *buf, size_t count) { unsigned int input; int ret; @@ -309,7 +309,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, } static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, - const char *buf, size_t count) + const char *buf, size_t count) { unsigned int input; int ret; @@ -618,10 +618,10 @@ static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) sample_time = pid_params.sample_rate_ms * USEC_PER_MSEC; duration_us = (u32) ktime_us_delta(cpu->sample.time, - cpu->last_sample_time); + cpu->last_sample_time); if (duration_us > sample_time * 3) { sample_ratio = div_fp(int_tofp(sample_time), - int_tofp(duration_us)); + int_tofp(duration_us)); core_busy = mul_fp(core_busy, sample_ratio); } @@ -763,7 +763,7 @@ static int intel_pstate_verify_policy(struct cpufreq_policy *policy) cpufreq_verify_within_cpu_limits(policy); if (policy->policy != CPUFREQ_POLICY_POWERSAVE && - policy->policy != CPUFREQ_POLICY_PERFORMANCE) + policy->policy != CPUFREQ_POLICY_PERFORMANCE) return -EINVAL; return 0; @@ -796,7 +796,7 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy) rdmsrl(MSR_IA32_MISC_ENABLE, misc_en); if (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE || - cpu->pstate.max_pstate == cpu->pstate.turbo_pstate) { + cpu->pstate.max_pstate == cpu->pstate.turbo_pstate) { limits.turbo_disabled = 1; limits.no_turbo = 1; } @@ -838,8 +838,8 @@ static int intel_pstate_msrs_not_valid(void) rdmsrl(MSR_IA32_MPERF, mperf); if (!pstate_funcs.get_max() || - !pstate_funcs.get_min() || - !pstate_funcs.get_turbo()) + !pstate_funcs.get_min() || + !pstate_funcs.get_turbo()) return -ENODEV; rdmsrl(MSR_IA32_APERF, tmp); @@ -921,14 +921,14 @@ static bool intel_pstate_platform_pwr_mgmt_exists(void) struct acpi_table_header hdr; struct hw_vendor_info *v_info; - if (acpi_disabled - || ACPI_FAILURE(acpi_get_table_header(ACPI_SIG_FADT, 0, &hdr))) + if (acpi_disabled || + ACPI_FAILURE(acpi_get_table_header(ACPI_SIG_FADT, 0, &hdr))) return false; for (v_info = vendor_info; v_info->valid; v_info++) { - if (!strncmp(hdr.oem_id, v_info->oem_id, ACPI_OEM_ID_SIZE) - && !strncmp(hdr.oem_table_id, v_info->oem_table_id, ACPI_OEM_TABLE_ID_SIZE) - && intel_pstate_no_acpi_pss()) + if (!strncmp(hdr.oem_id, v_info->oem_id, ACPI_OEM_ID_SIZE) && + !strncmp(hdr.oem_table_id, v_info->oem_table_id, ACPI_OEM_TABLE_ID_SIZE) && + intel_pstate_no_acpi_pss()) return true; } From 4ab60c3f32c721e46217e762bcd3e55a8f659c04 Mon Sep 17 00:00:00 2001 From: Stratos Karafotis Date: Fri, 18 Jul 2014 08:37:24 -0700 Subject: [PATCH 08/18] cpufreq: intel_pstate: Disable interrupts during MSRs reading According to Intel 64 and IA-32 Architectures SDM, Volume 3, Chapter 14.2, "Software needs to exercise care to avoid delays between the two RDMSRs (for example interrupts)". So, disable interrupts during reading MSRs IA32_APERF and IA32_MPERF. This should increase the accuracy of the calculations. Signed-off-by: Stratos Karafotis Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 9d75bd6faf65..ff3c5624972c 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -577,9 +577,12 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu) static inline void intel_pstate_sample(struct cpudata *cpu) { u64 aperf, mperf; + unsigned long flags; + local_irq_save(flags); rdmsrl(MSR_IA32_APERF, aperf); rdmsrl(MSR_IA32_MPERF, mperf); + local_irq_restore(flags); aperf = aperf >> FRAC_BITS; mperf = mperf >> FRAC_BITS; From ac658131d79e775efb0b819cc5a833e581d4de28 Mon Sep 17 00:00:00 2001 From: Stratos Karafotis Date: Fri, 18 Jul 2014 08:37:25 -0700 Subject: [PATCH 09/18] cpufreq: intel_pstate: Keep values in aperf/mperf in full precision Currently we shift right aperf and mperf variables by FRAC_BITS to prevent overflow when we convert them to fix point numbers (shift left by FRAC_BITS). But this is not necessary, because we actually use delta aperf and mperf which are much less than APERF and MPERF values. So, use the unmodified APERF and MPERF values in calculation. This also adds 8 bits in precision, although the gain is insignificant. Signed-off-by: Stratos Karafotis Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index ff3c5624972c..129ffb2853dc 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -584,9 +584,6 @@ static inline void intel_pstate_sample(struct cpudata *cpu) rdmsrl(MSR_IA32_MPERF, mperf); local_irq_restore(flags); - aperf = aperf >> FRAC_BITS; - mperf = mperf >> FRAC_BITS; - cpu->last_sample_time = cpu->sample.time; cpu->sample.time = ktime_get(); cpu->sample.aperf = aperf; From 4b707c893d0937be9c7be437950a312fbaf47601 Mon Sep 17 00:00:00 2001 From: Stratos Karafotis Date: Fri, 18 Jul 2014 08:37:26 -0700 Subject: [PATCH 10/18] cpufreq: intel_pstate: Simplify P state adjustment logic. Simplify the code by removing the inline functions pstate_increase and pstate_decrease and use directly the intel_pstate_set_pstate. Signed-off-by: Stratos Karafotis Acked-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 26 +++----------------------- 1 file changed, 3 insertions(+), 23 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 129ffb2853dc..2ff85f665f93 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -530,21 +530,6 @@ static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate) pstate_funcs.set(cpu, pstate); } -static inline void intel_pstate_pstate_increase(struct cpudata *cpu, int steps) -{ - int target; - target = cpu->pstate.current_pstate + steps; - - intel_pstate_set_pstate(cpu, target); -} - -static inline void intel_pstate_pstate_decrease(struct cpudata *cpu, int steps) -{ - int target; - target = cpu->pstate.current_pstate - steps; - intel_pstate_set_pstate(cpu, target); -} - static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) { cpu->pstate.min_pstate = pstate_funcs.get_min(); @@ -632,20 +617,15 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) { int32_t busy_scaled; struct _pid *pid; - signed int ctl = 0; - int steps; + signed int ctl; pid = &cpu->pid; busy_scaled = intel_pstate_get_scaled_busy(cpu); ctl = pid_calc(pid, busy_scaled); - steps = abs(ctl); - - if (ctl < 0) - intel_pstate_pstate_increase(cpu, steps); - else - intel_pstate_pstate_decrease(cpu, steps); + /* Negative values of ctl increase the pstate and vice versa */ + intel_pstate_set_pstate(cpu, cpu->pstate.current_pstate - ctl); } static void intel_pstate_timer_func(unsigned long __data) From 78e2708691e9289f97750eb71aca31b5a2973d94 Mon Sep 17 00:00:00 2001 From: Stratos Karafotis Date: Fri, 18 Jul 2014 08:37:27 -0700 Subject: [PATCH 11/18] cpufreq: intel_pstate: Remove core_pct rounding The specific rounding adds conditionally only 1/256 to fractional part of core_pct. We can safely remove it without any noticeable impact in calculations. Use div64_u64 instead of div_u64 to avoid possible overflow of sample->mperf as divisor Signed-off-by: Stratos Karafotis Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 2ff85f665f93..c5eac949760d 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -545,13 +545,9 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu) { struct sample *sample = &cpu->sample; int64_t core_pct; - int32_t rem; core_pct = int_tofp(sample->aperf) * int_tofp(100); - core_pct = div_u64_rem(core_pct, int_tofp(sample->mperf), &rem); - - if ((rem << 1) >= int_tofp(sample->mperf)) - core_pct += 1; + core_pct = div64_u64(core_pct, int_tofp(sample->mperf)); sample->freq = fp_toint( mul_fp(int_tofp(cpu->pstate.max_pstate * 1000), core_pct)); From 22d0628a226c9864fabd621ef2bf64c841219b46 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Fri, 20 Jun 2014 15:42:18 +0800 Subject: [PATCH 12/18] cpufreq: imx6: remove pu regulator dependency for SOCs with no PU regulator PU regulator is not a necessary regulator for cpufreq, not all i.MX6 SoCs have PU regulator, only if SOC has PU regulator, then its voltage must be equal to SOC regulator, so remove the dependency to support i.MX6SX which has no PU regulator. Signed-off-by: Anson Huang Acked-by: Viresh Kumar Acked-by: Shawn Guo Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/imx6q-cpufreq.c | 35 ++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index af366c21d4b4..c2d30765bf3d 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -66,10 +66,12 @@ static int imx6q_set_target(struct cpufreq_policy *policy, unsigned int index) /* scaling up? scale voltage before frequency */ if (new_freq > old_freq) { - ret = regulator_set_voltage_tol(pu_reg, imx6_soc_volt[index], 0); - if (ret) { - dev_err(cpu_dev, "failed to scale vddpu up: %d\n", ret); - return ret; + if (!IS_ERR(pu_reg)) { + ret = regulator_set_voltage_tol(pu_reg, imx6_soc_volt[index], 0); + if (ret) { + dev_err(cpu_dev, "failed to scale vddpu up: %d\n", ret); + return ret; + } } ret = regulator_set_voltage_tol(soc_reg, imx6_soc_volt[index], 0); if (ret) { @@ -121,10 +123,12 @@ static int imx6q_set_target(struct cpufreq_policy *policy, unsigned int index) dev_warn(cpu_dev, "failed to scale vddsoc down: %d\n", ret); ret = 0; } - ret = regulator_set_voltage_tol(pu_reg, imx6_soc_volt[index], 0); - if (ret) { - dev_warn(cpu_dev, "failed to scale vddpu down: %d\n", ret); - ret = 0; + if (!IS_ERR(pu_reg)) { + ret = regulator_set_voltage_tol(pu_reg, imx6_soc_volt[index], 0); + if (ret) { + dev_warn(cpu_dev, "failed to scale vddpu down: %d\n", ret); + ret = 0; + } } } @@ -182,9 +186,9 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev) } arm_reg = regulator_get(cpu_dev, "arm"); - pu_reg = regulator_get(cpu_dev, "pu"); + pu_reg = regulator_get_optional(cpu_dev, "pu"); soc_reg = regulator_get(cpu_dev, "soc"); - if (IS_ERR(arm_reg) || IS_ERR(pu_reg) || IS_ERR(soc_reg)) { + if (IS_ERR(arm_reg) || IS_ERR(soc_reg)) { dev_err(cpu_dev, "failed to get regulators\n"); ret = -ENOENT; goto put_reg; @@ -268,9 +272,11 @@ soc_opp_out: ret = regulator_set_voltage_time(soc_reg, imx6_soc_volt[0], imx6_soc_volt[num - 1]); if (ret > 0) transition_latency += ret * 1000; - ret = regulator_set_voltage_time(pu_reg, imx6_soc_volt[0], imx6_soc_volt[num - 1]); - if (ret > 0) - transition_latency += ret * 1000; + if (!IS_ERR(pu_reg)) { + ret = regulator_set_voltage_time(pu_reg, imx6_soc_volt[0], imx6_soc_volt[num - 1]); + if (ret > 0) + transition_latency += ret * 1000; + } /* * OPP is maintained in order of increasing frequency, and @@ -327,7 +333,8 @@ static int imx6q_cpufreq_remove(struct platform_device *pdev) cpufreq_unregister_driver(&imx6q_cpufreq_driver); dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table); regulator_put(arm_reg); - regulator_put(pu_reg); + if (!IS_ERR(pu_reg)) + regulator_put(pu_reg); regulator_put(soc_reg); clk_put(arm_clk); clk_put(pll1_sys_clk); From 5b0c0b16d48d20e26859907df4dd449e3b3c7f4c Mon Sep 17 00:00:00 2001 From: Stratos Karafotis Date: Mon, 30 Jun 2014 19:59:33 +0300 Subject: [PATCH 13/18] cpufreq: Introduce new relation for freq selection Introduce CPUFREQ_RELATION_C for frequency selection. It selects the frequency with the minimum euclidean distance to target. In case of equal distance between 2 frequencies, it will select the greater frequency. Signed-off-by: Stratos Karafotis Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/freq_table.c | 12 +++++++++++- include/linux/cpufreq.h | 1 + 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index 1632981c4b25..df14766a8e06 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -117,7 +117,7 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy, .frequency = 0, }; struct cpufreq_frequency_table *pos; - unsigned int freq, i = 0; + unsigned int freq, diff, i = 0; pr_debug("request for target %u kHz (relation: %u) for cpu %u\n", target_freq, relation, policy->cpu); @@ -127,6 +127,7 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy, suboptimal.frequency = ~0; break; case CPUFREQ_RELATION_L: + case CPUFREQ_RELATION_C: optimal.frequency = ~0; break; } @@ -168,6 +169,15 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy, } } break; + case CPUFREQ_RELATION_C: + diff = abs(freq - target_freq); + if (diff < optimal.frequency || + (diff == optimal.frequency && + freq > table[optimal.driver_data].frequency)) { + optimal.frequency = diff; + optimal.driver_data = i; + } + break; } } if (optimal.driver_data > i) { diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 8f8ae95c6e27..7d1955afa62c 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -176,6 +176,7 @@ static inline void disable_cpufreq(void) { } #define CPUFREQ_RELATION_L 0 /* lowest frequency at or above target */ #define CPUFREQ_RELATION_H 1 /* highest frequency below or at target */ +#define CPUFREQ_RELATION_C 2 /* closest frequency to target */ struct freq_attr { struct attribute attr; From 6393d6a1027ec1d69ec6246f6c7c2186f76c2abb Mon Sep 17 00:00:00 2001 From: Stratos Karafotis Date: Mon, 30 Jun 2014 19:59:34 +0300 Subject: [PATCH 14/18] cpufreq: ondemand: Eliminate the deadband effect Currently, ondemand calculates the target frequency proportional to load using the formula: Target frequency = C * load where C = policy->cpuinfo.max_freq / 100 Though, in many cases, the minimum available frequency is pretty high and the above calculation introduces a dead band from load 0 to 100 * policy->cpuinfo.min_freq / policy->cpuinfo.max_freq where the target frequency is always calculated to less than policy->cpuinfo.min_freq and the minimum frequency is selected. For example: on Intel i7-3770 @ 3.4GHz the policy->cpuinfo.min_freq = 1600000 and the policy->cpuinfo.max_freq = 3400000 (without turbo). Thus, the CPU starts to scale up at a load above 47. On quad core 1500MHz Krait the policy->cpuinfo.min_freq = 384000 and the policy->cpuinfo.max_freq = 1512000. Thus, the CPU starts to scale at load above 25. Change the calculation of target frequency to eliminate the above effect using the formula: Target frequency = A + B * load where A = policy->cpuinfo.min_freq and B = (policy->cpuinfo.max_freq - policy->cpuinfo->min_freq) / 100 This will map load values 0 to 100 linearly to cpuinfo.min_freq to cpuinfo.max_freq. Also, use the CPUFREQ_RELATION_C in __cpufreq_driver_target to select the closest frequency in frequency_table. This is necessary to avoid selection of minimum frequency only when load equals to 0. It will also help for selection of frequencies using a more 'fair' criterion. Tables below show the difference in selected frequency for specific values of load without and with this patch. On Intel i7-3770 @ 3.40GHz: Without With Load Target Selected Target Selected 0 0 1600000 1600000 1600000 5 170050 1600000 1690050 1700000 10 340100 1600000 1780100 1700000 15 510150 1600000 1870150 1900000 20 680200 1600000 1960200 2000000 25 850250 1600000 2050250 2100000 30 1020300 1600000 2140300 2100000 35 1190350 1600000 2230350 2200000 40 1360400 1600000 2320400 2400000 45 1530450 1600000 2410450 2400000 50 1700500 1900000 2500500 2500000 55 1870550 1900000 2590550 2600000 60 2040600 2100000 2680600 2600000 65 2210650 2400000 2770650 2800000 70 2380700 2400000 2860700 2800000 75 2550750 2600000 2950750 3000000 80 2720800 2800000 3040800 3000000 85 2890850 2900000 3130850 3100000 90 3060900 3100000 3220900 3300000 95 3230950 3300000 3310950 3300000 100 3401000 3401000 3401000 3401000 On ARM quad core 1500MHz Krait: Without With Load Target Selected Target Selected 0 0 384000 384000 384000 5 75600 384000 440400 486000 10 151200 384000 496800 486000 15 226800 384000 553200 594000 20 302400 384000 609600 594000 25 378000 384000 666000 702000 30 453600 486000 722400 702000 35 529200 594000 778800 810000 40 604800 702000 835200 810000 45 680400 702000 891600 918000 50 756000 810000 948000 918000 55 831600 918000 1004400 1026000 60 907200 918000 1060800 1026000 65 982800 1026000 1117200 1134000 70 1058400 1134000 1173600 1134000 75 1134000 1134000 1230000 1242000 80 1209600 1242000 1286400 1242000 85 1285200 1350000 1342800 1350000 90 1360800 1458000 1399200 1350000 95 1436400 1458000 1455600 1458000 100 1512000 1512000 1512000 1512000 Tested on Intel i7-3770 CPU @ 3.40GHz and on ARM quad core 1500MHz Krait (Android smartphone). Benchmarks on Intel i7 shows a performance improvement on low and medium work loads with lower power consumption. Specifics: Phoronix Linux Kernel Compilation 3.1: Time: -0.40%, energy: -0.07% Phoronix Apache: Time: -4.98%, energy: -2.35% Phoronix FFMPEG: Time: -6.29%, energy: -4.02% Also, running mp3 decoding (very low load) shows no differences with and without this patch. Signed-off-by: Stratos Karafotis Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_ondemand.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 18d409189092..ad3f38fd3eb9 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -170,21 +170,24 @@ static void od_check_cpu(int cpu, unsigned int load) dbs_freq_increase(policy, policy->max); } else { /* Calculate the next frequency proportional to load */ - unsigned int freq_next; - freq_next = load * policy->cpuinfo.max_freq / 100; + unsigned int freq_next, min_f, max_f; + + min_f = policy->cpuinfo.min_freq; + max_f = policy->cpuinfo.max_freq; + freq_next = min_f + load * (max_f - min_f) / 100; /* No longer fully busy, reset rate_mult */ dbs_info->rate_mult = 1; if (!od_tuners->powersave_bias) { __cpufreq_driver_target(policy, freq_next, - CPUFREQ_RELATION_L); + CPUFREQ_RELATION_C); return; } freq_next = od_ops.powersave_bias_target(policy, freq_next, CPUFREQ_RELATION_L); - __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L); + __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_C); } } From 853aa05aad49125d28ffbd4faf49a0db779f0f8a Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 9 Jul 2014 17:19:04 -0400 Subject: [PATCH 15/18] powernow-k6: support 350MHz CPU There exists 350MHz K6-2E+ CPU, so add it to the usual frequency table. Signed-off-by: Mikulas Patocka Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/powernow-k6.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/powernow-k6.c b/drivers/cpufreq/powernow-k6.c index c8012bc86910..f91027259c3c 100644 --- a/drivers/cpufreq/powernow-k6.c +++ b/drivers/cpufreq/powernow-k6.c @@ -55,6 +55,7 @@ static const struct { unsigned freq; unsigned mult; } usual_frequency_table[] = { + { 350000, 35 }, // 100 * 3.5 { 400000, 40 }, // 100 * 4 { 450000, 45 }, // 100 * 4.5 { 475000, 50 }, // 95 * 5 From 1461dc7d1c252fbeb5baee0c94b5b28b7966e125 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 17 Jul 2014 10:48:26 +0530 Subject: [PATCH 16/18] cpufreq: don't restore policy->cpus on failure to move kobj While hot-unplugging policy->cpu, we call cpufreq_nominate_new_policy_cpu() to nominate next owner of policy, i.e. policy->cpu. If we fail to move policy kobject under the new policy->cpu, we try to update policy->cpus with the old policy->cpu. This would have been required in case old-CPU is removed from policy->cpus in the first place. But its not done before calling cpufreq_nominate_new_policy_cpu(), but during the POST_DEAD notification which happens quite late in the hot-unplugging path. So, this is just some useless code hanging around, get rid of it. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 6f024852c6fb..e572d51db945 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1322,11 +1322,6 @@ static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy, ret = kobject_move(&policy->kobj, &cpu_dev->kobj); if (ret) { pr_err("%s: Failed to move kobj: %d\n", __func__, ret); - - down_write(&policy->rwsem); - cpumask_set_cpu(old_cpu, policy->cpus); - up_write(&policy->rwsem); - ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj, "cpufreq"); From 41dfd908fc09e0e57f8f44fdcf543348cd5f249c Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 17 Jul 2014 10:48:27 +0530 Subject: [PATCH 17/18] cpufreq: propagate error returned by kobject_move() We are returning -EINVAL instead of the error returned from kobject_move() when it fails. Propagate the actual error number. Also add a meaningful print when sysfs_create_link() fails. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index e572d51db945..ec25ca607679 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1322,10 +1322,12 @@ static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy, ret = kobject_move(&policy->kobj, &cpu_dev->kobj); if (ret) { pr_err("%s: Failed to move kobj: %d\n", __func__, ret); - ret = sysfs_create_link(&cpu_dev->kobj, &policy->kobj, - "cpufreq"); - return -EINVAL; + if (sysfs_create_link(&cpu_dev->kobj, &policy->kobj, "cpufreq")) + pr_err("%s: Failed to restore kobj link to cpu:%d\n", + __func__, cpu_dev->id); + + return ret; } return cpu_dev->id; From 1bfb425b3b6bf30f5d4b851049b2057dce860bfd Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 17 Jul 2014 10:48:28 +0530 Subject: [PATCH 18/18] cpufreq: move policy kobj to update_policy_cpu() We are calling kobject_move() from two separate places currently and both these places share another routine update_policy_cpu() which is handling everything around updating policy->cpu. Moving ownership of policy->kobj also lies under the role of update_policy_cpu() routine and must be handled from there. So, Lets move kobject_move() to update_policy_cpu() and get rid of cpufreq_nominate_new_policy_cpu() as it doesn't have anything significant left. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 71 ++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 38 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index ec25ca607679..d9fdeddcef96 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1076,10 +1076,20 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy) kfree(policy); } -static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu) +static int update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu, + struct device *cpu_dev) { + int ret; + if (WARN_ON(cpu == policy->cpu)) - return; + return 0; + + /* Move kobject to the new policy->cpu */ + ret = kobject_move(&policy->kobj, &cpu_dev->kobj); + if (ret) { + pr_err("%s: Failed to move kobj: %d\n", __func__, ret); + return ret; + } down_write(&policy->rwsem); @@ -1090,6 +1100,8 @@ static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu) blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_UPDATE_POLICY_CPU, policy); + + return 0; } static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) @@ -1153,12 +1165,10 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) * the creation of a brand new one. So we need to perform this update * by invoking update_policy_cpu(). */ - if (recover_policy && cpu != policy->cpu) { - update_policy_cpu(policy, cpu); - WARN_ON(kobject_move(&policy->kobj, &dev->kobj)); - } else { + if (recover_policy && cpu != policy->cpu) + WARN_ON(update_policy_cpu(policy, cpu, dev)); + else policy->cpu = cpu; - } cpumask_copy(policy->cpus, cpumask_of(cpu)); @@ -1309,35 +1319,11 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) return __cpufreq_add_dev(dev, sif); } -static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy, - unsigned int old_cpu) -{ - struct device *cpu_dev; - int ret; - - /* first sibling now owns the new sysfs dir */ - cpu_dev = get_cpu_device(cpumask_any_but(policy->cpus, old_cpu)); - - sysfs_remove_link(&cpu_dev->kobj, "cpufreq"); - ret = kobject_move(&policy->kobj, &cpu_dev->kobj); - if (ret) { - pr_err("%s: Failed to move kobj: %d\n", __func__, ret); - - if (sysfs_create_link(&cpu_dev->kobj, &policy->kobj, "cpufreq")) - pr_err("%s: Failed to restore kobj link to cpu:%d\n", - __func__, cpu_dev->id); - - return ret; - } - - return cpu_dev->id; -} - static int __cpufreq_remove_dev_prepare(struct device *dev, struct subsys_interface *sif) { unsigned int cpu = dev->id, cpus; - int new_cpu, ret; + int ret; unsigned long flags; struct cpufreq_policy *policy; @@ -1377,14 +1363,23 @@ static int __cpufreq_remove_dev_prepare(struct device *dev, if (cpu != policy->cpu) { sysfs_remove_link(&dev->kobj, "cpufreq"); } else if (cpus > 1) { - new_cpu = cpufreq_nominate_new_policy_cpu(policy, cpu); - if (new_cpu >= 0) { - update_policy_cpu(policy, new_cpu); + /* Nominate new CPU */ + int new_cpu = cpumask_any_but(policy->cpus, cpu); + struct device *cpu_dev = get_cpu_device(new_cpu); - if (!cpufreq_suspended) - pr_debug("%s: policy Kobject moved to cpu: %d from: %d\n", - __func__, new_cpu, cpu); + sysfs_remove_link(&cpu_dev->kobj, "cpufreq"); + ret = update_policy_cpu(policy, new_cpu, cpu_dev); + if (ret) { + if (sysfs_create_link(&cpu_dev->kobj, &policy->kobj, + "cpufreq")) + pr_err("%s: Failed to restore kobj link to cpu:%d\n", + __func__, cpu_dev->id); + return ret; } + + if (!cpufreq_suspended) + pr_debug("%s: policy Kobject moved to cpu: %d from: %d\n", + __func__, new_cpu, cpu); } else if (cpufreq_driver->stop_cpu && cpufreq_driver->setpolicy) { cpufreq_driver->stop_cpu(policy); }