diff --git a/apollolake-4.4.180/cpufreq_conservative.ko b/apollolake-4.4.180/cpufreq_conservative.ko new file mode 100644 index 000000000..a6856b0ab Binary files /dev/null and b/apollolake-4.4.180/cpufreq_conservative.ko differ diff --git a/apollolake-4.4.180/cpufreq_ondemand.ko b/apollolake-4.4.180/cpufreq_ondemand.ko new file mode 100644 index 000000000..3d780d4e8 Binary files /dev/null and b/apollolake-4.4.180/cpufreq_ondemand.ko differ diff --git a/apollolake-4.4.180/cpufreq_userspace.ko b/apollolake-4.4.180/cpufreq_userspace.ko new file mode 100644 index 000000000..c078c8724 Binary files /dev/null and b/apollolake-4.4.180/cpufreq_userspace.ko differ diff --git a/broadwell-4.4.180/cpufreq_conservative.ko b/broadwell-4.4.180/cpufreq_conservative.ko new file mode 100644 index 000000000..83d871102 Binary files /dev/null and b/broadwell-4.4.180/cpufreq_conservative.ko differ diff --git a/broadwell-4.4.180/cpufreq_ondemand.ko b/broadwell-4.4.180/cpufreq_ondemand.ko new file mode 100644 index 000000000..7b903ed6c Binary files /dev/null and b/broadwell-4.4.180/cpufreq_ondemand.ko differ diff --git a/broadwell-4.4.180/cpufreq_userspace.ko b/broadwell-4.4.180/cpufreq_userspace.ko new file mode 100644 index 000000000..07355af7b Binary files /dev/null and b/broadwell-4.4.180/cpufreq_userspace.ko differ diff --git a/broadwellnk-4.4.180/cpufreq_conservative.ko b/broadwellnk-4.4.180/cpufreq_conservative.ko new file mode 100644 index 000000000..c4511d8bc Binary files /dev/null and b/broadwellnk-4.4.180/cpufreq_conservative.ko differ diff --git a/broadwellnk-4.4.180/cpufreq_ondemand.ko b/broadwellnk-4.4.180/cpufreq_ondemand.ko new file mode 100644 index 000000000..ede2bb880 Binary files /dev/null and b/broadwellnk-4.4.180/cpufreq_ondemand.ko differ diff --git a/broadwellnk-4.4.180/cpufreq_userspace.ko b/broadwellnk-4.4.180/cpufreq_userspace.ko new file mode 100644 index 000000000..58f0c9060 Binary files /dev/null and b/broadwellnk-4.4.180/cpufreq_userspace.ko differ diff --git a/bromolow-3.10.108/cpufreq_conservative.ko b/bromolow-3.10.108/cpufreq_conservative.ko new file mode 100644 index 000000000..74960e1f9 Binary files /dev/null and b/bromolow-3.10.108/cpufreq_conservative.ko differ diff --git a/bromolow-3.10.108/cpufreq_ondemand.ko b/bromolow-3.10.108/cpufreq_ondemand.ko new file mode 100644 index 000000000..285ed7764 Binary files /dev/null and b/bromolow-3.10.108/cpufreq_ondemand.ko differ diff --git a/bromolow-3.10.108/cpufreq_userspace.ko b/bromolow-3.10.108/cpufreq_userspace.ko new file mode 100644 index 000000000..1ded78cd3 Binary files /dev/null and b/bromolow-3.10.108/cpufreq_userspace.ko differ diff --git a/denverton-4.4.180/cpufreq_conservative.ko b/denverton-4.4.180/cpufreq_conservative.ko new file mode 100644 index 000000000..471dc9dfa Binary files /dev/null and b/denverton-4.4.180/cpufreq_conservative.ko differ diff --git a/denverton-4.4.180/cpufreq_ondemand.ko b/denverton-4.4.180/cpufreq_ondemand.ko new file mode 100644 index 000000000..9527edf0f Binary files /dev/null and b/denverton-4.4.180/cpufreq_ondemand.ko differ diff --git a/denverton-4.4.180/cpufreq_userspace.ko b/denverton-4.4.180/cpufreq_userspace.ko new file mode 100644 index 000000000..8cac9daf3 Binary files /dev/null and b/denverton-4.4.180/cpufreq_userspace.ko differ diff --git a/epyc7002-5.10.55/scsi_transport_sas.ko b/epyc7002-5.10.55/scsi_transport_sas.ko new file mode 100644 index 000000000..667946361 Binary files /dev/null and b/epyc7002-5.10.55/scsi_transport_sas.ko differ diff --git a/geminilake-4.4.180/cpufreq_conservative.ko b/geminilake-4.4.180/cpufreq_conservative.ko new file mode 100644 index 000000000..517086242 Binary files /dev/null and b/geminilake-4.4.180/cpufreq_conservative.ko differ diff --git a/geminilake-4.4.180/cpufreq_ondemand.ko b/geminilake-4.4.180/cpufreq_ondemand.ko new file mode 100644 index 000000000..5f6d1d619 Binary files /dev/null and b/geminilake-4.4.180/cpufreq_ondemand.ko differ diff --git a/geminilake-4.4.180/cpufreq_userspace.ko b/geminilake-4.4.180/cpufreq_userspace.ko new file mode 100644 index 000000000..7cb41d743 Binary files /dev/null and b/geminilake-4.4.180/cpufreq_userspace.ko differ diff --git a/r1000-4.4.180/cpufreq_conservative.ko b/r1000-4.4.180/cpufreq_conservative.ko new file mode 100644 index 000000000..b5d5e48b4 Binary files /dev/null and b/r1000-4.4.180/cpufreq_conservative.ko differ diff --git a/r1000-4.4.180/cpufreq_ondemand.ko b/r1000-4.4.180/cpufreq_ondemand.ko new file mode 100644 index 000000000..0fd511dd1 Binary files /dev/null and b/r1000-4.4.180/cpufreq_ondemand.ko differ diff --git a/r1000-4.4.180/cpufreq_userspace.ko b/r1000-4.4.180/cpufreq_userspace.ko new file mode 100644 index 000000000..a63e5de7c Binary files /dev/null and b/r1000-4.4.180/cpufreq_userspace.ko differ diff --git a/src/.compile.sh b/src/.compile.sh index 26339d18c..962dbad59 100755 --- a/src/.compile.sh +++ b/src/.compile.sh @@ -24,11 +24,11 @@ function compile-module() { done < PLATFORMS } -if [ -f ../../arpl/PLATFORMS ]; then - cp ../../arpl/PLATFORMS PLATFORMS -else - curl -sLO "https://github.com/fbelavenuto/arpl/raw/main/PLATFORMS" -fi +#if [ -f ../../arpl/PLATFORMS ]; then +# cp ../../arpl/PLATFORMS PLATFORMS +#else +# curl -sLO "https://github.com/fbelavenuto/arpl/raw/main/PLATFORMS" +#fi if [ $# -ge 1 ]; then for A in $@; do diff --git a/src/PLATFORMS b/src/PLATFORMS index 5173e00d7..39b9e0f48 100644 --- a/src/PLATFORMS +++ b/src/PLATFORMS @@ -1,8 +1 @@ -bromolow 3.10.108 -apollolake 4.4.180 -broadwell 4.4.180 -broadwellnk 4.4.180 -denverton 4.4.180 -geminilake 4.4.180 -v1000 4.4.180 -r1000 4.4.180 +epyc7002 5.10.55 diff --git a/src/cpufreq/3.x/Makefile b/src/cpufreq/3.x/Makefile new file mode 100644 index 000000000..76985a5ee --- /dev/null +++ b/src/cpufreq/3.x/Makefile @@ -0,0 +1,39 @@ +# CPUfreq governors +#obj-$(CONFIG_CPU_FREQ_GOV_PERFORMANCE) += cpufreq_performance.o +#obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE) += cpufreq_powersave.o +obj-m += cpufreq_userspace.o +obj-m += cpufreq_ondemand.o +obj-m += cpufreq_conservative.o +#obj-$(CONFIG_CPU_FREQ_GOV_COMMON) += cpufreq_governor.o + +# CPUfreq cross-arch helpers +#obj-$(CONFIG_CPU_FREQ_TABLE) += freq_table.o + +#obj-$(CONFIG_GENERIC_CPUFREQ_CPU0) += cpufreq-cpu0.o + +################################################################################## +# x86 drivers. +# Link order matters. K8 is preferred to ACPI because of firmware bugs in early +# K8 systems. This is still the case but acpi-cpufreq errors out so that +# powernow-k8 can load then. ACPI is preferred to all other hardware-specific drivers. +# speedstep-* is preferred over p4-clockmod. + +#obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o mperf.o +#obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o +#obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o +#obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o +#obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o +#obj-$(CONFIG_X86_LONGHAUL) += longhaul.o +#obj-$(CONFIG_X86_E_POWERSAVER) += e_powersaver.o +#obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o +#obj-$(CONFIG_SC520_CPUFREQ) += sc520_freq.o +#obj-$(CONFIG_X86_LONGRUN) += longrun.o +#obj-$(CONFIG_X86_GX_SUSPMOD) += gx-suspmod.o +#obj-$(CONFIG_X86_SPEEDSTEP_ICH) += speedstep-ich.o +#obj-$(CONFIG_X86_SPEEDSTEP_LIB) += speedstep-lib.o +#obj-$(CONFIG_X86_SPEEDSTEP_SMI) += speedstep-smi.o +#obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o +#obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o +#obj-$(CONFIG_X86_CPUFREQ_NFORCE2) += cpufreq-nforce2.o +#obj-m += intel_pstate.o +#obj-$(CONFIG_X86_AMD_FREQ_SENSITIVITY) += amd_freq_sensitivity.o diff --git a/src/cpufreq/3.x/cpufreq_conservative.c b/src/cpufreq/3.x/cpufreq_conservative.c new file mode 100644 index 000000000..f34e81916 --- /dev/null +++ b/src/cpufreq/3.x/cpufreq_conservative.c @@ -0,0 +1,416 @@ +/* + * drivers/cpufreq/cpufreq_conservative.c + * + * Copyright (C) 2001 Russell King + * (C) 2003 Venkatesh Pallipadi . + * Jun Nakajima + * (C) 2009 Alexander Clouter + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cpufreq_governor.h" + +/* Conservative governor macros */ +#define DEF_FREQUENCY_UP_THRESHOLD (80) +#define DEF_FREQUENCY_DOWN_THRESHOLD (20) +#define DEF_FREQUENCY_STEP (5) +#define DEF_SAMPLING_DOWN_FACTOR (1) +#define MAX_SAMPLING_DOWN_FACTOR (10) + +static DEFINE_PER_CPU(struct cs_cpu_dbs_info_s, cs_cpu_dbs_info); + +static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners, + struct cpufreq_policy *policy) +{ + unsigned int freq_target = (cs_tuners->freq_step * policy->max) / 100; + + /* max freq cannot be less than 100. But who knows... */ + if (unlikely(freq_target == 0)) + freq_target = DEF_FREQUENCY_STEP; + + return freq_target; +} + +/* + * Every sampling_rate, we check, if current idle time is less than 20% + * (default), then we try to increase frequency. Every sampling_rate * + * sampling_down_factor, we check, if current idle time is more than 80% + * (default), then we try to decrease frequency + * + * Any frequency increase takes it to the maximum frequency. Frequency reduction + * happens at minimum steps of 5% (default) of maximum frequency + */ +static void cs_check_cpu(int cpu, unsigned int load) +{ + struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, cpu); + struct cpufreq_policy *policy = dbs_info->cdbs.cur_policy; + struct dbs_data *dbs_data = policy->governor_data; + struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + + /* + * break out if we 'cannot' reduce the speed as the user might + * want freq_step to be zero + */ + if (cs_tuners->freq_step == 0) + return; + + /* Check for frequency increase */ + if (load > cs_tuners->up_threshold) { + dbs_info->down_skip = 0; + + /* if we are already at full speed then break out early */ + if (dbs_info->requested_freq == policy->max) + return; + + dbs_info->requested_freq += get_freq_target(cs_tuners, policy); + if (dbs_info->requested_freq > policy->max) + dbs_info->requested_freq = policy->max; + + __cpufreq_driver_target(policy, dbs_info->requested_freq, + CPUFREQ_RELATION_H); + return; + } + + /* if sampling_down_factor is active break out early */ + if (++dbs_info->down_skip < cs_tuners->sampling_down_factor) + return; + dbs_info->down_skip = 0; + + /* Check for frequency decrease */ + if (load < cs_tuners->down_threshold) { + /* + * if we cannot reduce the frequency anymore, break out early + */ + if (policy->cur == policy->min) + return; + + dbs_info->requested_freq -= get_freq_target(cs_tuners, policy); + if (dbs_info->requested_freq < policy->min) + dbs_info->requested_freq = policy->min; + + __cpufreq_driver_target(policy, dbs_info->requested_freq, + CPUFREQ_RELATION_L); + return; + } +} + +static void cs_dbs_timer(struct work_struct *work) +{ + struct cs_cpu_dbs_info_s *dbs_info = container_of(work, + struct cs_cpu_dbs_info_s, cdbs.work.work); + unsigned int cpu = dbs_info->cdbs.cur_policy->cpu; + struct cs_cpu_dbs_info_s *core_dbs_info = &per_cpu(cs_cpu_dbs_info, + cpu); + struct dbs_data *dbs_data = dbs_info->cdbs.cur_policy->governor_data; + struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + int delay = delay_for_sampling_rate(cs_tuners->sampling_rate); + bool modify_all = true; + + mutex_lock(&core_dbs_info->cdbs.timer_mutex); + if (!need_load_eval(&core_dbs_info->cdbs, cs_tuners->sampling_rate)) + modify_all = false; + else + dbs_check_cpu(dbs_data, cpu); + + gov_queue_work(dbs_data, dbs_info->cdbs.cur_policy, delay, modify_all); + mutex_unlock(&core_dbs_info->cdbs.timer_mutex); +} + +static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_freqs *freq = data; + struct cs_cpu_dbs_info_s *dbs_info = + &per_cpu(cs_cpu_dbs_info, freq->cpu); + struct cpufreq_policy *policy; + + if (!dbs_info->enable) + return 0; + + policy = dbs_info->cdbs.cur_policy; + + /* + * we only care if our internally tracked freq moves outside the 'valid' + * ranges of frequency available to us otherwise we do not change it + */ + if (dbs_info->requested_freq > policy->max + || dbs_info->requested_freq < policy->min) + dbs_info->requested_freq = freq->new; + + return 0; +} + +/************************** sysfs interface ************************/ +static struct common_dbs_data cs_dbs_cdata; + +static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, + const char *buf, size_t count) +{ + struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) + return -EINVAL; + + cs_tuners->sampling_down_factor = input; + return count; +} + +static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, + size_t count) +{ + struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1) + return -EINVAL; + + cs_tuners->sampling_rate = max(input, dbs_data->min_sampling_rate); + return count; +} + +static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, + size_t count) +{ + struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1 || input > 100 || input <= cs_tuners->down_threshold) + return -EINVAL; + + cs_tuners->up_threshold = input; + return count; +} + +static ssize_t store_down_threshold(struct dbs_data *dbs_data, const char *buf, + size_t count) +{ + struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + /* cannot be lower than 1 otherwise freq will not fall */ + if (ret != 1 || input < 1 || input > 100 || + input >= cs_tuners->up_threshold) + return -EINVAL; + + cs_tuners->down_threshold = input; + return count; +} + +static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, + const char *buf, size_t count) +{ + struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + unsigned int input, j; + int ret; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + if (input > 1) + input = 1; + + if (input == cs_tuners->ignore_nice_load) /* nothing to do */ + return count; + + cs_tuners->ignore_nice_load = input; + + /* we need to re-evaluate prev_cpu_idle */ + for_each_online_cpu(j) { + struct cs_cpu_dbs_info_s *dbs_info; + dbs_info = &per_cpu(cs_cpu_dbs_info, j); + dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, + &dbs_info->cdbs.prev_cpu_wall, 0); + if (cs_tuners->ignore_nice_load) + dbs_info->cdbs.prev_cpu_nice = + kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + } + return count; +} + +static ssize_t store_freq_step(struct dbs_data *dbs_data, const char *buf, + size_t count) +{ + struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1) + return -EINVAL; + + if (input > 100) + input = 100; + + /* + * no need to test here if freq_step is zero as the user might actually + * want this, they would be crazy though :) + */ + cs_tuners->freq_step = input; + return count; +} + +show_store_one(cs, sampling_rate); +show_store_one(cs, sampling_down_factor); +show_store_one(cs, up_threshold); +show_store_one(cs, down_threshold); +show_store_one(cs, ignore_nice_load); +show_store_one(cs, freq_step); +declare_show_sampling_rate_min(cs); + +gov_sys_pol_attr_rw(sampling_rate); +gov_sys_pol_attr_rw(sampling_down_factor); +gov_sys_pol_attr_rw(up_threshold); +gov_sys_pol_attr_rw(down_threshold); +gov_sys_pol_attr_rw(ignore_nice_load); +gov_sys_pol_attr_rw(freq_step); +gov_sys_pol_attr_ro(sampling_rate_min); + +static struct attribute *dbs_attributes_gov_sys[] = { + &sampling_rate_min_gov_sys.attr, + &sampling_rate_gov_sys.attr, + &sampling_down_factor_gov_sys.attr, + &up_threshold_gov_sys.attr, + &down_threshold_gov_sys.attr, + &ignore_nice_load_gov_sys.attr, + &freq_step_gov_sys.attr, + NULL +}; + +static struct attribute_group cs_attr_group_gov_sys = { + .attrs = dbs_attributes_gov_sys, + .name = "conservative", +}; + +static struct attribute *dbs_attributes_gov_pol[] = { + &sampling_rate_min_gov_pol.attr, + &sampling_rate_gov_pol.attr, + &sampling_down_factor_gov_pol.attr, + &up_threshold_gov_pol.attr, + &down_threshold_gov_pol.attr, + &ignore_nice_load_gov_pol.attr, + &freq_step_gov_pol.attr, + NULL +}; + +static struct attribute_group cs_attr_group_gov_pol = { + .attrs = dbs_attributes_gov_pol, + .name = "conservative", +}; + +/************************** sysfs end ************************/ + +static int cs_init(struct dbs_data *dbs_data) +{ + struct cs_dbs_tuners *tuners; + + tuners = kzalloc(sizeof(struct cs_dbs_tuners), GFP_KERNEL); + if (!tuners) { + pr_err("%s: kzalloc failed\n", __func__); + return -ENOMEM; + } + + tuners->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; + tuners->down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD; + tuners->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; + tuners->ignore_nice_load = 0; + tuners->freq_step = DEF_FREQUENCY_STEP; + + dbs_data->tuners = tuners; + dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO * + jiffies_to_usecs(10); + mutex_init(&dbs_data->mutex); + return 0; +} + +static void cs_exit(struct dbs_data *dbs_data) +{ + kfree(dbs_data->tuners); +} + +define_get_cpu_dbs_routines(cs_cpu_dbs_info); + +static struct notifier_block cs_cpufreq_notifier_block = { + .notifier_call = dbs_cpufreq_notifier, +}; + +static struct cs_ops cs_ops = { + .notifier_block = &cs_cpufreq_notifier_block, +}; + +static struct common_dbs_data cs_dbs_cdata = { + .governor = GOV_CONSERVATIVE, + .attr_group_gov_sys = &cs_attr_group_gov_sys, + .attr_group_gov_pol = &cs_attr_group_gov_pol, + .get_cpu_cdbs = get_cpu_cdbs, + .get_cpu_dbs_info_s = get_cpu_dbs_info_s, + .gov_dbs_timer = cs_dbs_timer, + .gov_check_cpu = cs_check_cpu, + .gov_ops = &cs_ops, + .init = cs_init, + .exit = cs_exit, +}; + +static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event) +{ + return cpufreq_governor_dbs(policy, &cs_dbs_cdata, event); +} + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE +static +#endif +struct cpufreq_governor cpufreq_gov_conservative = { + .name = "conservative", + .governor = cs_cpufreq_governor_dbs, + .max_transition_latency = TRANSITION_LATENCY_LIMIT, + .owner = THIS_MODULE, +}; + +static int __init cpufreq_gov_dbs_init(void) +{ + return cpufreq_register_governor(&cpufreq_gov_conservative); +} + +static void __exit cpufreq_gov_dbs_exit(void) +{ + cpufreq_unregister_governor(&cpufreq_gov_conservative); +} + +MODULE_AUTHOR("Alexander Clouter "); +MODULE_DESCRIPTION("'cpufreq_conservative' - A dynamic cpufreq governor for " + "Low Latency Frequency Transition capable processors " + "optimised for use in a battery environment"); +MODULE_LICENSE("GPL"); + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE +fs_initcall(cpufreq_gov_dbs_init); +#else +module_init(cpufreq_gov_dbs_init); +#endif +module_exit(cpufreq_gov_dbs_exit); diff --git a/src/cpufreq/3.x/cpufreq_governor.h b/src/cpufreq/3.x/cpufreq_governor.h new file mode 100644 index 000000000..4a9058aeb --- /dev/null +++ b/src/cpufreq/3.x/cpufreq_governor.h @@ -0,0 +1,270 @@ +/* + * drivers/cpufreq/cpufreq_governor.h + * + * Header file for CPUFreq governors common code + * + * Copyright (C) 2001 Russell King + * (C) 2003 Venkatesh Pallipadi . + * (C) 2003 Jun Nakajima + * (C) 2009 Alexander Clouter + * (c) 2012 Viresh Kumar + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _CPUFREQ_GOVERNOR_H +#define _CPUFREQ_GOVERNOR_H + +#include +#include +#include +#include +#include + +/* + * The polling frequency depends on the capability of the processor. Default + * polling frequency is 1000 times the transition latency of the processor. The + * governor will work on any processor with transition latency <= 10mS, using + * appropriate sampling rate. + * + * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL) + * this governor will not work. All times here are in uS. + */ +#define MIN_SAMPLING_RATE_RATIO (2) +#define LATENCY_MULTIPLIER (1000) +#define MIN_LATENCY_MULTIPLIER (20) +#define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) + +/* Ondemand Sampling types */ +enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE}; + +/* + * Macro for creating governors sysfs routines + * + * - gov_sys: One governor instance per whole system + * - gov_pol: One governor instance per policy + */ + +/* Create attributes */ +#define gov_sys_attr_ro(_name) \ +static struct global_attr _name##_gov_sys = \ +__ATTR(_name, 0444, show_##_name##_gov_sys, NULL) + +#define gov_sys_attr_rw(_name) \ +static struct global_attr _name##_gov_sys = \ +__ATTR(_name, 0644, show_##_name##_gov_sys, store_##_name##_gov_sys) + +#define gov_pol_attr_ro(_name) \ +static struct freq_attr _name##_gov_pol = \ +__ATTR(_name, 0444, show_##_name##_gov_pol, NULL) + +#define gov_pol_attr_rw(_name) \ +static struct freq_attr _name##_gov_pol = \ +__ATTR(_name, 0644, show_##_name##_gov_pol, store_##_name##_gov_pol) + +#define gov_sys_pol_attr_rw(_name) \ + gov_sys_attr_rw(_name); \ + gov_pol_attr_rw(_name) + +#define gov_sys_pol_attr_ro(_name) \ + gov_sys_attr_ro(_name); \ + gov_pol_attr_ro(_name) + +/* Create show/store routines */ +#define show_one(_gov, file_name) \ +static ssize_t show_##file_name##_gov_sys \ +(struct kobject *kobj, struct attribute *attr, char *buf) \ +{ \ + struct _gov##_dbs_tuners *tuners = _gov##_dbs_cdata.gdbs_data->tuners; \ + return sprintf(buf, "%u\n", tuners->file_name); \ +} \ + \ +static ssize_t show_##file_name##_gov_pol \ +(struct cpufreq_policy *policy, char *buf) \ +{ \ + struct dbs_data *dbs_data = policy->governor_data; \ + struct _gov##_dbs_tuners *tuners = dbs_data->tuners; \ + return sprintf(buf, "%u\n", tuners->file_name); \ +} + +#define store_one(_gov, file_name) \ +static ssize_t store_##file_name##_gov_sys \ +(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) \ +{ \ + struct dbs_data *dbs_data = _gov##_dbs_cdata.gdbs_data; \ + return store_##file_name(dbs_data, buf, count); \ +} \ + \ +static ssize_t store_##file_name##_gov_pol \ +(struct cpufreq_policy *policy, const char *buf, size_t count) \ +{ \ + struct dbs_data *dbs_data = policy->governor_data; \ + return store_##file_name(dbs_data, buf, count); \ +} + +#define show_store_one(_gov, file_name) \ +show_one(_gov, file_name); \ +store_one(_gov, file_name) + +/* create helper routines */ +#define define_get_cpu_dbs_routines(_dbs_info) \ +static struct cpu_dbs_common_info *get_cpu_cdbs(int cpu) \ +{ \ + return &per_cpu(_dbs_info, cpu).cdbs; \ +} \ + \ +static void *get_cpu_dbs_info_s(int cpu) \ +{ \ + return &per_cpu(_dbs_info, cpu); \ +} + +/* + * Abbreviations: + * dbs: used as a shortform for demand based switching It helps to keep variable + * names smaller, simpler + * cdbs: common dbs + * od_*: On-demand governor + * cs_*: Conservative governor + */ + +/* Per cpu structures */ +struct cpu_dbs_common_info { + int cpu; + u64 prev_cpu_idle; + u64 prev_cpu_wall; + u64 prev_cpu_nice; + struct cpufreq_policy *cur_policy; + struct delayed_work work; + /* + * percpu mutex that serializes governor limit change with gov_dbs_timer + * invocation. We do not want gov_dbs_timer to run when user is changing + * the governor or limits. + */ + struct mutex timer_mutex; + ktime_t time_stamp; +}; + +struct od_cpu_dbs_info_s { + struct cpu_dbs_common_info cdbs; + struct cpufreq_frequency_table *freq_table; + unsigned int freq_lo; + unsigned int freq_lo_jiffies; + unsigned int freq_hi_jiffies; + unsigned int rate_mult; + unsigned int sample_type:1; +}; + +struct cs_cpu_dbs_info_s { + struct cpu_dbs_common_info cdbs; + unsigned int down_skip; + unsigned int requested_freq; + unsigned int enable:1; +}; + +/* Per policy Governers sysfs tunables */ +struct od_dbs_tuners { + unsigned int ignore_nice_load; + unsigned int sampling_rate; + unsigned int sampling_down_factor; + unsigned int up_threshold; + unsigned int powersave_bias; + unsigned int io_is_busy; +}; + +struct cs_dbs_tuners { + unsigned int ignore_nice_load; + unsigned int sampling_rate; + unsigned int sampling_down_factor; + unsigned int up_threshold; + unsigned int down_threshold; + unsigned int freq_step; +}; + +/* Common Governer data across policies */ +struct dbs_data; +struct common_dbs_data { + /* Common across governors */ + #define GOV_ONDEMAND 0 + #define GOV_CONSERVATIVE 1 + int governor; + struct attribute_group *attr_group_gov_sys; /* one governor - system */ + struct attribute_group *attr_group_gov_pol; /* one governor - policy */ + + /* Common data for platforms that don't set have_governor_per_policy */ + struct dbs_data *gdbs_data; + + struct cpu_dbs_common_info *(*get_cpu_cdbs)(int cpu); + void *(*get_cpu_dbs_info_s)(int cpu); + void (*gov_dbs_timer)(struct work_struct *work); + void (*gov_check_cpu)(int cpu, unsigned int load); + int (*init)(struct dbs_data *dbs_data); + void (*exit)(struct dbs_data *dbs_data); + + /* Governor specific ops, see below */ + void *gov_ops; +}; + +/* Governer Per policy data */ +struct dbs_data { + struct common_dbs_data *cdata; + unsigned int min_sampling_rate; + int usage_count; + void *tuners; + + /* dbs_mutex protects dbs_enable in governor start/stop */ + struct mutex mutex; +}; + +/* Governor specific ops, will be passed to dbs_data->gov_ops */ +struct od_ops { + void (*powersave_bias_init_cpu)(int cpu); + unsigned int (*powersave_bias_target)(struct cpufreq_policy *policy, + unsigned int freq_next, unsigned int relation); + void (*freq_increase)(struct cpufreq_policy *p, unsigned int freq); +}; + +struct cs_ops { + struct notifier_block *notifier_block; +}; + +static inline int delay_for_sampling_rate(unsigned int sampling_rate) +{ + int delay = usecs_to_jiffies(sampling_rate); + + /* We want all CPUs to do sampling nearly on same jiffy */ + if (num_online_cpus() > 1) + delay -= jiffies % delay; + + return delay; +} + +#define declare_show_sampling_rate_min(_gov) \ +static ssize_t show_sampling_rate_min_gov_sys \ +(struct kobject *kobj, struct attribute *attr, char *buf) \ +{ \ + struct dbs_data *dbs_data = _gov##_dbs_cdata.gdbs_data; \ + return sprintf(buf, "%u\n", dbs_data->min_sampling_rate); \ +} \ + \ +static ssize_t show_sampling_rate_min_gov_pol \ +(struct cpufreq_policy *policy, char *buf) \ +{ \ + struct dbs_data *dbs_data = policy->governor_data; \ + return sprintf(buf, "%u\n", dbs_data->min_sampling_rate); \ +} + +u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy); +void dbs_check_cpu(struct dbs_data *dbs_data, int cpu); +bool need_load_eval(struct cpu_dbs_common_info *cdbs, + unsigned int sampling_rate); +int cpufreq_governor_dbs(struct cpufreq_policy *policy, + struct common_dbs_data *cdata, unsigned int event); +void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy, + unsigned int delay, bool all_cpus); +void od_register_powersave_bias_handler(unsigned int (*f) + (struct cpufreq_policy *, unsigned int, unsigned int), + unsigned int powersave_bias); +void od_unregister_powersave_bias_handler(void); +#endif /* _CPUFREQ_GOVERNOR_H */ diff --git a/src/cpufreq/3.x/cpufreq_ondemand.c b/src/cpufreq/3.x/cpufreq_ondemand.c new file mode 100644 index 000000000..25438bbf9 --- /dev/null +++ b/src/cpufreq/3.x/cpufreq_ondemand.c @@ -0,0 +1,642 @@ +/* + * drivers/cpufreq/cpufreq_ondemand.c + * + * Copyright (C) 2001 Russell King + * (C) 2003 Venkatesh Pallipadi . + * Jun Nakajima + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cpufreq_governor.h" + +/* On-demand governor macros */ +#define DEF_FREQUENCY_UP_THRESHOLD (80) +#define DEF_SAMPLING_DOWN_FACTOR (1) +#define MAX_SAMPLING_DOWN_FACTOR (100000) +#define MICRO_FREQUENCY_UP_THRESHOLD (95) +#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) +#define MIN_FREQUENCY_UP_THRESHOLD (11) +#define MAX_FREQUENCY_UP_THRESHOLD (100) + +static DEFINE_PER_CPU(struct od_cpu_dbs_info_s, od_cpu_dbs_info); + +static struct od_ops od_ops; + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND +static struct cpufreq_governor cpufreq_gov_ondemand; +#endif + +static unsigned int default_powersave_bias; + +static void ondemand_powersave_bias_init_cpu(int cpu) +{ + struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + + dbs_info->freq_table = cpufreq_frequency_get_table(cpu); + dbs_info->freq_lo = 0; +} + +/* + * Not all CPUs want IO time to be accounted as busy; this depends on how + * efficient idling at a higher frequency/voltage is. + * Pavel Machek says this is not so for various generations of AMD and old + * Intel systems. + * Mike Chan (android.com) claims this is also not true for ARM. + * Because of this, whitelist specific known (series) of CPUs by default, and + * leave all others up to the user. + */ +static int should_io_be_busy(void) +{ +#if defined(CONFIG_X86) + /* + * For Intel, Core 2 (model 15) and later have an efficient idle. + */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 6 && + boot_cpu_data.x86_model >= 15) + return 1; +#endif + return 0; +} + +/* + * Find right freq to be set now with powersave_bias on. + * Returns the freq_hi to be used right now and will set freq_hi_jiffies, + * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs. + */ +static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, + unsigned int freq_next, unsigned int relation) +{ + unsigned int freq_req, freq_reduc, freq_avg; + unsigned int freq_hi, freq_lo; + unsigned int index = 0; + unsigned int jiffies_total, jiffies_hi, jiffies_lo; + struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, + policy->cpu); + struct dbs_data *dbs_data = policy->governor_data; + struct od_dbs_tuners *od_tuners = dbs_data->tuners; + + if (!dbs_info->freq_table) { + dbs_info->freq_lo = 0; + dbs_info->freq_lo_jiffies = 0; + return freq_next; + } + + cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next, + relation, &index); + freq_req = dbs_info->freq_table[index].frequency; + freq_reduc = freq_req * od_tuners->powersave_bias / 1000; + freq_avg = freq_req - freq_reduc; + + /* Find freq bounds for freq_avg in freq_table */ + index = 0; + cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg, + CPUFREQ_RELATION_H, &index); + freq_lo = dbs_info->freq_table[index].frequency; + index = 0; + cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg, + CPUFREQ_RELATION_L, &index); + freq_hi = dbs_info->freq_table[index].frequency; + + /* Find out how long we have to be in hi and lo freqs */ + if (freq_hi == freq_lo) { + dbs_info->freq_lo = 0; + dbs_info->freq_lo_jiffies = 0; + return freq_lo; + } + jiffies_total = usecs_to_jiffies(od_tuners->sampling_rate); + jiffies_hi = (freq_avg - freq_lo) * jiffies_total; + jiffies_hi += ((freq_hi - freq_lo) / 2); + jiffies_hi /= (freq_hi - freq_lo); + jiffies_lo = jiffies_total - jiffies_hi; + dbs_info->freq_lo = freq_lo; + dbs_info->freq_lo_jiffies = jiffies_lo; + dbs_info->freq_hi_jiffies = jiffies_hi; + return freq_hi; +} + +static void ondemand_powersave_bias_init(void) +{ + int i; + for_each_online_cpu(i) { + ondemand_powersave_bias_init_cpu(i); + } +} + +static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq) +{ + struct dbs_data *dbs_data = p->governor_data; + struct od_dbs_tuners *od_tuners = dbs_data->tuners; + + if (od_tuners->powersave_bias) + freq = od_ops.powersave_bias_target(p, freq, + CPUFREQ_RELATION_H); + else if (p->cur == p->max) + return; + + __cpufreq_driver_target(p, freq, od_tuners->powersave_bias ? + CPUFREQ_RELATION_L : CPUFREQ_RELATION_H); +} + +/* + * Every sampling_rate, we check, if current idle time is less than 20% + * (default), then we try to increase frequency. Else, we adjust the frequency + * proportional to load. + */ +static void od_check_cpu(int cpu, unsigned int load) +{ + struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + struct cpufreq_policy *policy = dbs_info->cdbs.cur_policy; + struct dbs_data *dbs_data = policy->governor_data; + struct od_dbs_tuners *od_tuners = dbs_data->tuners; + + dbs_info->freq_lo = 0; + + /* Check for frequency increase */ + if (load > od_tuners->up_threshold) { + /* If switching to max speed, apply sampling_down_factor */ + if (policy->cur < policy->max) + dbs_info->rate_mult = + od_tuners->sampling_down_factor; + dbs_freq_increase(policy, policy->max); + return; + } else { + /* Calculate the next frequency proportional to load */ + unsigned int freq_next; + freq_next = load * policy->cpuinfo.max_freq / 100; + + /* No longer fully busy, reset rate_mult */ + dbs_info->rate_mult = 1; + + if (freq_next < policy->min) + freq_next = policy->min; + + if (!od_tuners->powersave_bias) { + __cpufreq_driver_target(policy, freq_next, + CPUFREQ_RELATION_L); + return; + } + + freq_next = od_ops.powersave_bias_target(policy, freq_next, + CPUFREQ_RELATION_L); + __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L); + } +} + +static void od_dbs_timer(struct work_struct *work) +{ + struct od_cpu_dbs_info_s *dbs_info = + container_of(work, struct od_cpu_dbs_info_s, cdbs.work.work); + unsigned int cpu = dbs_info->cdbs.cur_policy->cpu; + struct od_cpu_dbs_info_s *core_dbs_info = &per_cpu(od_cpu_dbs_info, + cpu); + struct dbs_data *dbs_data = dbs_info->cdbs.cur_policy->governor_data; + struct od_dbs_tuners *od_tuners = dbs_data->tuners; + int delay = 0, sample_type = core_dbs_info->sample_type; + bool modify_all = true; + + mutex_lock(&core_dbs_info->cdbs.timer_mutex); + if (!need_load_eval(&core_dbs_info->cdbs, od_tuners->sampling_rate)) { + modify_all = false; + goto max_delay; + } + + /* Common NORMAL_SAMPLE setup */ + core_dbs_info->sample_type = OD_NORMAL_SAMPLE; + if (sample_type == OD_SUB_SAMPLE) { + delay = core_dbs_info->freq_lo_jiffies; + __cpufreq_driver_target(core_dbs_info->cdbs.cur_policy, + core_dbs_info->freq_lo, CPUFREQ_RELATION_H); + } else { + dbs_check_cpu(dbs_data, cpu); + if (core_dbs_info->freq_lo) { + /* Setup timer for SUB_SAMPLE */ + core_dbs_info->sample_type = OD_SUB_SAMPLE; + delay = core_dbs_info->freq_hi_jiffies; + } + } + +max_delay: + if (!delay) + delay = delay_for_sampling_rate(od_tuners->sampling_rate + * core_dbs_info->rate_mult); + + gov_queue_work(dbs_data, dbs_info->cdbs.cur_policy, delay, modify_all); + mutex_unlock(&core_dbs_info->cdbs.timer_mutex); +} + +/************************** sysfs interface ************************/ +static struct common_dbs_data od_dbs_cdata; + +/** + * update_sampling_rate - update sampling rate effective immediately if needed. + * @new_rate: new sampling rate + * + * If new rate is smaller than the old, simply updating + * dbs_tuners_int.sampling_rate might not be appropriate. For example, if the + * original sampling_rate was 1 second and the requested new sampling rate is 10 + * ms because the user needs immediate reaction from ondemand governor, but not + * sure if higher frequency will be required or not, then, the governor may + * change the sampling rate too late; up to 1 second later. Thus, if we are + * reducing the sampling rate, we need to make the new value effective + * immediately. + */ +static void update_sampling_rate(struct dbs_data *dbs_data, + unsigned int new_rate) +{ + struct od_dbs_tuners *od_tuners = dbs_data->tuners; + int cpu; + + od_tuners->sampling_rate = new_rate = max(new_rate, + dbs_data->min_sampling_rate); + + for_each_online_cpu(cpu) { + struct cpufreq_policy *policy; + struct od_cpu_dbs_info_s *dbs_info; + unsigned long next_sampling, appointed_at; + + policy = cpufreq_cpu_get(cpu); + if (!policy) + continue; + if (policy->governor != &cpufreq_gov_ondemand) { + cpufreq_cpu_put(policy); + continue; + } + dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + cpufreq_cpu_put(policy); + + mutex_lock(&dbs_info->cdbs.timer_mutex); + + if (!delayed_work_pending(&dbs_info->cdbs.work)) { + mutex_unlock(&dbs_info->cdbs.timer_mutex); + continue; + } + + next_sampling = jiffies + usecs_to_jiffies(new_rate); + appointed_at = dbs_info->cdbs.work.timer.expires; + + if (time_before(next_sampling, appointed_at)) { + + mutex_unlock(&dbs_info->cdbs.timer_mutex); + cancel_delayed_work_sync(&dbs_info->cdbs.work); + mutex_lock(&dbs_info->cdbs.timer_mutex); + + gov_queue_work(dbs_data, dbs_info->cdbs.cur_policy, + usecs_to_jiffies(new_rate), true); + + } + mutex_unlock(&dbs_info->cdbs.timer_mutex); + } +} + +static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, + size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + update_sampling_rate(dbs_data, input); + return count; +} + +static ssize_t store_io_is_busy(struct dbs_data *dbs_data, const char *buf, + size_t count) +{ + struct od_dbs_tuners *od_tuners = dbs_data->tuners; + unsigned int input; + int ret; + unsigned int j; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + od_tuners->io_is_busy = !!input; + + /* we need to re-evaluate prev_cpu_idle */ + for_each_online_cpu(j) { + struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, + j); + dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, + &dbs_info->cdbs.prev_cpu_wall, od_tuners->io_is_busy); + } + return count; +} + +static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, + size_t count) +{ + struct od_dbs_tuners *od_tuners = dbs_data->tuners; + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || + input < MIN_FREQUENCY_UP_THRESHOLD) { + return -EINVAL; + } + + od_tuners->up_threshold = input; + return count; +} + +static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, + const char *buf, size_t count) +{ + struct od_dbs_tuners *od_tuners = dbs_data->tuners; + unsigned int input, j; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) + return -EINVAL; + od_tuners->sampling_down_factor = input; + + /* Reset down sampling multiplier in case it was active */ + for_each_online_cpu(j) { + struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, + j); + dbs_info->rate_mult = 1; + } + return count; +} + +static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, + const char *buf, size_t count) +{ + struct od_dbs_tuners *od_tuners = dbs_data->tuners; + unsigned int input; + int ret; + + unsigned int j; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + if (input > 1) + input = 1; + + if (input == od_tuners->ignore_nice_load) { /* nothing to do */ + return count; + } + od_tuners->ignore_nice_load = input; + + /* we need to re-evaluate prev_cpu_idle */ + for_each_online_cpu(j) { + struct od_cpu_dbs_info_s *dbs_info; + dbs_info = &per_cpu(od_cpu_dbs_info, j); + dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, + &dbs_info->cdbs.prev_cpu_wall, od_tuners->io_is_busy); + if (od_tuners->ignore_nice_load) + dbs_info->cdbs.prev_cpu_nice = + kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + + } + return count; +} + +static ssize_t store_powersave_bias(struct dbs_data *dbs_data, const char *buf, + size_t count) +{ + struct od_dbs_tuners *od_tuners = dbs_data->tuners; + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1) + return -EINVAL; + + if (input > 1000) + input = 1000; + + od_tuners->powersave_bias = input; + ondemand_powersave_bias_init(); + return count; +} + +show_store_one(od, sampling_rate); +show_store_one(od, io_is_busy); +show_store_one(od, up_threshold); +show_store_one(od, sampling_down_factor); +show_store_one(od, ignore_nice_load); +show_store_one(od, powersave_bias); +declare_show_sampling_rate_min(od); + +gov_sys_pol_attr_rw(sampling_rate); +gov_sys_pol_attr_rw(io_is_busy); +gov_sys_pol_attr_rw(up_threshold); +gov_sys_pol_attr_rw(sampling_down_factor); +gov_sys_pol_attr_rw(ignore_nice_load); +gov_sys_pol_attr_rw(powersave_bias); +gov_sys_pol_attr_ro(sampling_rate_min); + +static struct attribute *dbs_attributes_gov_sys[] = { + &sampling_rate_min_gov_sys.attr, + &sampling_rate_gov_sys.attr, + &up_threshold_gov_sys.attr, + &sampling_down_factor_gov_sys.attr, + &ignore_nice_load_gov_sys.attr, + &powersave_bias_gov_sys.attr, + &io_is_busy_gov_sys.attr, + NULL +}; + +static struct attribute_group od_attr_group_gov_sys = { + .attrs = dbs_attributes_gov_sys, + .name = "ondemand", +}; + +static struct attribute *dbs_attributes_gov_pol[] = { + &sampling_rate_min_gov_pol.attr, + &sampling_rate_gov_pol.attr, + &up_threshold_gov_pol.attr, + &sampling_down_factor_gov_pol.attr, + &ignore_nice_load_gov_pol.attr, + &powersave_bias_gov_pol.attr, + &io_is_busy_gov_pol.attr, + NULL +}; + +static struct attribute_group od_attr_group_gov_pol = { + .attrs = dbs_attributes_gov_pol, + .name = "ondemand", +}; + +/************************** sysfs end ************************/ + +static int od_init(struct dbs_data *dbs_data) +{ + struct od_dbs_tuners *tuners; + u64 idle_time; + int cpu; + + tuners = kzalloc(sizeof(struct od_dbs_tuners), GFP_KERNEL); + if (!tuners) { + pr_err("%s: kzalloc failed\n", __func__); + return -ENOMEM; + } + + cpu = get_cpu(); + idle_time = get_cpu_idle_time_us(cpu, NULL); + put_cpu(); + if (idle_time != -1ULL) { + /* Idle micro accounting is supported. Use finer thresholds */ + tuners->up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; + /* + * In nohz/micro accounting case we set the minimum frequency + * not depending on HZ, but fixed (very low). The deferred + * timer might skip some samples if idle/sleeping as needed. + */ + dbs_data->min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE; + } else { + tuners->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; + + /* For correct statistics, we need 10 ticks for each measure */ + dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO * + jiffies_to_usecs(10); + } + + tuners->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; + tuners->ignore_nice_load = 0; + tuners->powersave_bias = default_powersave_bias; + tuners->io_is_busy = should_io_be_busy(); + + dbs_data->tuners = tuners; + mutex_init(&dbs_data->mutex); + return 0; +} + +static void od_exit(struct dbs_data *dbs_data) +{ + kfree(dbs_data->tuners); +} + +define_get_cpu_dbs_routines(od_cpu_dbs_info); + +static struct od_ops od_ops = { + .powersave_bias_init_cpu = ondemand_powersave_bias_init_cpu, + .powersave_bias_target = generic_powersave_bias_target, + .freq_increase = dbs_freq_increase, +}; + +static struct common_dbs_data od_dbs_cdata = { + .governor = GOV_ONDEMAND, + .attr_group_gov_sys = &od_attr_group_gov_sys, + .attr_group_gov_pol = &od_attr_group_gov_pol, + .get_cpu_cdbs = get_cpu_cdbs, + .get_cpu_dbs_info_s = get_cpu_dbs_info_s, + .gov_dbs_timer = od_dbs_timer, + .gov_check_cpu = od_check_cpu, + .gov_ops = &od_ops, + .init = od_init, + .exit = od_exit, +}; + +static void od_set_powersave_bias(unsigned int powersave_bias) +{ + struct cpufreq_policy *policy; + struct dbs_data *dbs_data; + struct od_dbs_tuners *od_tuners; + unsigned int cpu; + cpumask_t done; + + default_powersave_bias = powersave_bias; + cpumask_clear(&done); + + get_online_cpus(); + for_each_online_cpu(cpu) { + if (cpumask_test_cpu(cpu, &done)) + continue; + + policy = per_cpu(od_cpu_dbs_info, cpu).cdbs.cur_policy; + if (!policy) + continue; + + cpumask_or(&done, &done, policy->cpus); + + if (policy->governor != &cpufreq_gov_ondemand) + continue; + + dbs_data = policy->governor_data; + od_tuners = dbs_data->tuners; + od_tuners->powersave_bias = default_powersave_bias; + } + put_online_cpus(); +} + +void od_register_powersave_bias_handler(unsigned int (*f) + (struct cpufreq_policy *, unsigned int, unsigned int), + unsigned int powersave_bias) +{ + od_ops.powersave_bias_target = f; + od_set_powersave_bias(powersave_bias); +} +EXPORT_SYMBOL_GPL(od_register_powersave_bias_handler); + +void od_unregister_powersave_bias_handler(void) +{ + od_ops.powersave_bias_target = generic_powersave_bias_target; + od_set_powersave_bias(0); +} +EXPORT_SYMBOL_GPL(od_unregister_powersave_bias_handler); + +static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event) +{ + return cpufreq_governor_dbs(policy, &od_dbs_cdata, event); +} + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND +static +#endif +struct cpufreq_governor cpufreq_gov_ondemand = { + .name = "ondemand", + .governor = od_cpufreq_governor_dbs, + .max_transition_latency = TRANSITION_LATENCY_LIMIT, + .owner = THIS_MODULE, +}; + +static int __init cpufreq_gov_dbs_init(void) +{ + return cpufreq_register_governor(&cpufreq_gov_ondemand); +} + +static void __exit cpufreq_gov_dbs_exit(void) +{ + cpufreq_unregister_governor(&cpufreq_gov_ondemand); +} + +MODULE_AUTHOR("Venkatesh Pallipadi "); +MODULE_AUTHOR("Alexey Starikovskiy "); +MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for " + "Low Latency Frequency Transition capable processors"); +MODULE_LICENSE("GPL"); + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND +fs_initcall(cpufreq_gov_dbs_init); +#else +module_init(cpufreq_gov_dbs_init); +#endif +module_exit(cpufreq_gov_dbs_exit); diff --git a/src/cpufreq/3.x/cpufreq_userspace.c b/src/cpufreq/3.x/cpufreq_userspace.c new file mode 100644 index 000000000..bbeb9c072 --- /dev/null +++ b/src/cpufreq/3.x/cpufreq_userspace.c @@ -0,0 +1,222 @@ + +/* + * linux/drivers/cpufreq/cpufreq_userspace.c + * + * Copyright (C) 2001 Russell King + * (C) 2002 - 2004 Dominik Brodowski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * A few values needed by the userspace governor + */ +static DEFINE_PER_CPU(unsigned int, cpu_max_freq); +static DEFINE_PER_CPU(unsigned int, cpu_min_freq); +static DEFINE_PER_CPU(unsigned int, cpu_cur_freq); /* current CPU freq */ +static DEFINE_PER_CPU(unsigned int, cpu_set_freq); /* CPU freq desired by + userspace */ +static DEFINE_PER_CPU(unsigned int, cpu_is_managed); + +static DEFINE_MUTEX(userspace_mutex); +static int cpus_using_userspace_governor; + +/* keep track of frequency transitions */ +static int +userspace_cpufreq_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_freqs *freq = data; + + if (!per_cpu(cpu_is_managed, freq->cpu)) + return 0; + + if (val == CPUFREQ_POSTCHANGE) { + pr_debug("saving cpu_cur_freq of cpu %u to be %u kHz\n", + freq->cpu, freq->new); + per_cpu(cpu_cur_freq, freq->cpu) = freq->new; + } + + return 0; +} + +static struct notifier_block userspace_cpufreq_notifier_block = { + .notifier_call = userspace_cpufreq_notifier +}; + + +/** + * cpufreq_set - set the CPU frequency + * @policy: pointer to policy struct where freq is being set + * @freq: target frequency in kHz + * + * Sets the CPU frequency to freq. + */ +static int cpufreq_set(struct cpufreq_policy *policy, unsigned int freq) +{ + int ret = -EINVAL; + + pr_debug("cpufreq_set for cpu %u, freq %u kHz\n", policy->cpu, freq); + + mutex_lock(&userspace_mutex); + if (!per_cpu(cpu_is_managed, policy->cpu)) + goto err; + + per_cpu(cpu_set_freq, policy->cpu) = freq; + + if (freq < per_cpu(cpu_min_freq, policy->cpu)) + freq = per_cpu(cpu_min_freq, policy->cpu); + if (freq > per_cpu(cpu_max_freq, policy->cpu)) + freq = per_cpu(cpu_max_freq, policy->cpu); + + /* + * We're safe from concurrent calls to ->target() here + * as we hold the userspace_mutex lock. If we were calling + * cpufreq_driver_target, a deadlock situation might occur: + * A: cpufreq_set (lock userspace_mutex) -> + * cpufreq_driver_target(lock policy->lock) + * B: cpufreq_set_policy(lock policy->lock) -> + * __cpufreq_governor -> + * cpufreq_governor_userspace (lock userspace_mutex) + */ + ret = __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L); + + err: + mutex_unlock(&userspace_mutex); + return ret; +} + + +static ssize_t show_speed(struct cpufreq_policy *policy, char *buf) +{ + return sprintf(buf, "%u\n", per_cpu(cpu_cur_freq, policy->cpu)); +} + +static int cpufreq_governor_userspace(struct cpufreq_policy *policy, + unsigned int event) +{ + unsigned int cpu = policy->cpu; + int rc = 0; + + switch (event) { + case CPUFREQ_GOV_START: + BUG_ON(!policy->cur); + mutex_lock(&userspace_mutex); + + if (cpus_using_userspace_governor == 0) { + cpufreq_register_notifier( + &userspace_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + } + cpus_using_userspace_governor++; + + per_cpu(cpu_is_managed, cpu) = 1; + per_cpu(cpu_min_freq, cpu) = policy->min; + per_cpu(cpu_max_freq, cpu) = policy->max; + per_cpu(cpu_cur_freq, cpu) = policy->cur; + per_cpu(cpu_set_freq, cpu) = policy->cur; + pr_debug("managing cpu %u started " + "(%u - %u kHz, currently %u kHz)\n", + cpu, + per_cpu(cpu_min_freq, cpu), + per_cpu(cpu_max_freq, cpu), + per_cpu(cpu_cur_freq, cpu)); + + mutex_unlock(&userspace_mutex); + break; + case CPUFREQ_GOV_STOP: + mutex_lock(&userspace_mutex); + cpus_using_userspace_governor--; + if (cpus_using_userspace_governor == 0) { + cpufreq_unregister_notifier( + &userspace_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + } + + per_cpu(cpu_is_managed, cpu) = 0; + per_cpu(cpu_min_freq, cpu) = 0; + per_cpu(cpu_max_freq, cpu) = 0; + per_cpu(cpu_set_freq, cpu) = 0; + pr_debug("managing cpu %u stopped\n", cpu); + mutex_unlock(&userspace_mutex); + break; + case CPUFREQ_GOV_LIMITS: + mutex_lock(&userspace_mutex); + pr_debug("limit event for cpu %u: %u - %u kHz, " + "currently %u kHz, last set to %u kHz\n", + cpu, policy->min, policy->max, + per_cpu(cpu_cur_freq, cpu), + per_cpu(cpu_set_freq, cpu)); + if (policy->max < per_cpu(cpu_set_freq, cpu)) { + __cpufreq_driver_target(policy, policy->max, + CPUFREQ_RELATION_H); + } else if (policy->min > per_cpu(cpu_set_freq, cpu)) { + __cpufreq_driver_target(policy, policy->min, + CPUFREQ_RELATION_L); + } else { + __cpufreq_driver_target(policy, + per_cpu(cpu_set_freq, cpu), + CPUFREQ_RELATION_L); + } + per_cpu(cpu_min_freq, cpu) = policy->min; + per_cpu(cpu_max_freq, cpu) = policy->max; + per_cpu(cpu_cur_freq, cpu) = policy->cur; + mutex_unlock(&userspace_mutex); + break; + } + return rc; +} + + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE +static +#endif +struct cpufreq_governor cpufreq_gov_userspace = { + .name = "userspace", + .governor = cpufreq_governor_userspace, + .store_setspeed = cpufreq_set, + .show_setspeed = show_speed, + .owner = THIS_MODULE, +}; + +static int __init cpufreq_gov_userspace_init(void) +{ + return cpufreq_register_governor(&cpufreq_gov_userspace); +} + + +static void __exit cpufreq_gov_userspace_exit(void) +{ + cpufreq_unregister_governor(&cpufreq_gov_userspace); +} + + +MODULE_AUTHOR("Dominik Brodowski , " + "Russell King "); +MODULE_DESCRIPTION("CPUfreq policy governor 'userspace'"); +MODULE_LICENSE("GPL"); + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE +fs_initcall(cpufreq_gov_userspace_init); +#else +module_init(cpufreq_gov_userspace_init); +#endif +module_exit(cpufreq_gov_userspace_exit); diff --git a/src/cpufreq/3.x/intel_pstate.c b/src/cpufreq/3.x/intel_pstate.c new file mode 100644 index 000000000..decf84e71 --- /dev/null +++ b/src/cpufreq/3.x/intel_pstate.c @@ -0,0 +1,764 @@ +/* + * intel_pstate.c: Native P state management for Intel processors + * + * (C) Copyright 2012 Intel Corporation + * Author: Dirk Brandewie + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define SAMPLE_COUNT 3 + +#define FRAC_BITS 8 +#define int_tofp(X) ((int64_t)(X) << FRAC_BITS) +#define fp_toint(X) ((X) >> FRAC_BITS) + +static inline int32_t mul_fp(int32_t x, int32_t y) +{ + return ((int64_t)x * (int64_t)y) >> FRAC_BITS; +} + +static inline int32_t div_fp(int32_t x, int32_t y) +{ + return div_s64((int64_t)x << FRAC_BITS, (int64_t)y); +} + +struct sample { + int32_t core_pct_busy; + u64 aperf; + u64 mperf; + int freq; +}; + +struct pstate_data { + int current_pstate; + int min_pstate; + int max_pstate; + int turbo_pstate; +}; + +struct _pid { + int setpoint; + int32_t integral; + int32_t p_gain; + int32_t i_gain; + int32_t d_gain; + int deadband; + int32_t last_err; +}; + +struct cpudata { + int cpu; + + char name[64]; + + struct timer_list timer; + + struct pstate_adjust_policy *pstate_policy; + struct pstate_data pstate; + struct _pid pid; + + int min_pstate_count; + + u64 prev_aperf; + u64 prev_mperf; + int sample_ptr; + struct sample samples[SAMPLE_COUNT]; +}; + +static struct cpudata **all_cpu_data; +struct pstate_adjust_policy { + int sample_rate_ms; + int deadband; + int setpoint; + int p_gain_pct; + int d_gain_pct; + int i_gain_pct; +}; + +static struct pstate_adjust_policy default_policy = { + .sample_rate_ms = 10, + .deadband = 0, + .setpoint = 97, + .p_gain_pct = 20, + .d_gain_pct = 0, + .i_gain_pct = 0, +}; + +struct perf_limits { + int no_turbo; + int max_perf_pct; + int min_perf_pct; + int32_t max_perf; + int32_t min_perf; + int max_policy_pct; + int max_sysfs_pct; +}; + +static struct perf_limits limits = { + .no_turbo = 0, + .max_perf_pct = 100, + .max_perf = int_tofp(1), + .min_perf_pct = 0, + .min_perf = 0, + .max_policy_pct = 100, + .max_sysfs_pct = 100, +}; + +static inline void pid_reset(struct _pid *pid, int setpoint, int busy, + int deadband, int integral) { + pid->setpoint = setpoint; + pid->deadband = deadband; + pid->integral = int_tofp(integral); + pid->last_err = setpoint - busy; +} + +static inline void pid_p_gain_set(struct _pid *pid, int percent) +{ + pid->p_gain = div_fp(int_tofp(percent), int_tofp(100)); +} + +static inline void pid_i_gain_set(struct _pid *pid, int percent) +{ + pid->i_gain = div_fp(int_tofp(percent), int_tofp(100)); +} + +static inline void pid_d_gain_set(struct _pid *pid, int percent) +{ + + pid->d_gain = div_fp(int_tofp(percent), int_tofp(100)); +} + +static signed int pid_calc(struct _pid *pid, int32_t busy) +{ + signed int result; + int32_t pterm, dterm, fp_error; + int32_t integral_limit; + + fp_error = int_tofp(pid->setpoint) - busy; + + if (abs(fp_error) <= int_tofp(pid->deadband)) + return 0; + + pterm = mul_fp(pid->p_gain, fp_error); + + pid->integral += fp_error; + + /* limit the integral term */ + integral_limit = int_tofp(30); + if (pid->integral > integral_limit) + pid->integral = integral_limit; + if (pid->integral < -integral_limit) + pid->integral = -integral_limit; + + dterm = mul_fp(pid->d_gain, fp_error - pid->last_err); + pid->last_err = fp_error; + + result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm; + + return (signed int)fp_toint(result); +} + +static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu) +{ + pid_p_gain_set(&cpu->pid, cpu->pstate_policy->p_gain_pct); + pid_d_gain_set(&cpu->pid, cpu->pstate_policy->d_gain_pct); + pid_i_gain_set(&cpu->pid, cpu->pstate_policy->i_gain_pct); + + pid_reset(&cpu->pid, + cpu->pstate_policy->setpoint, + 100, + cpu->pstate_policy->deadband, + 0); +} + +static inline void intel_pstate_reset_all_pid(void) +{ + unsigned int cpu; + for_each_online_cpu(cpu) { + if (all_cpu_data[cpu]) + intel_pstate_busy_pid_reset(all_cpu_data[cpu]); + } +} + +/************************** debugfs begin ************************/ +static int pid_param_set(void *data, u64 val) +{ + *(u32 *)data = val; + intel_pstate_reset_all_pid(); + return 0; +} +static int pid_param_get(void *data, u64 *val) +{ + *val = *(u32 *)data; + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(fops_pid_param, pid_param_get, + pid_param_set, "%llu\n"); + +struct pid_param { + char *name; + void *value; +}; + +static struct pid_param pid_files[] = { + {"sample_rate_ms", &default_policy.sample_rate_ms}, + {"d_gain_pct", &default_policy.d_gain_pct}, + {"i_gain_pct", &default_policy.i_gain_pct}, + {"deadband", &default_policy.deadband}, + {"setpoint", &default_policy.setpoint}, + {"p_gain_pct", &default_policy.p_gain_pct}, + {NULL, NULL} +}; + +static struct dentry *debugfs_parent; +static void intel_pstate_debug_expose_params(void) +{ + int i = 0; + + debugfs_parent = debugfs_create_dir("pstate_snb", NULL); + if (IS_ERR_OR_NULL(debugfs_parent)) + return; + while (pid_files[i].name) { + debugfs_create_file(pid_files[i].name, 0660, + debugfs_parent, pid_files[i].value, + &fops_pid_param); + i++; + } +} + +/************************** debugfs end ************************/ + +/************************** sysfs begin ************************/ +#define show_one(file_name, object) \ + static ssize_t show_##file_name \ + (struct kobject *kobj, struct attribute *attr, char *buf) \ + { \ + return sprintf(buf, "%u\n", limits.object); \ + } + +static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + limits.no_turbo = clamp_t(int, input, 0 , 1); + + return count; +} + +static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + limits.max_sysfs_pct = clamp_t(int, input, 0 , 100); + limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct); + limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100)); + return count; +} + +static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + limits.min_perf_pct = clamp_t(int, input, 0 , 100); + limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100)); + + return count; +} + +show_one(no_turbo, no_turbo); +show_one(max_perf_pct, max_perf_pct); +show_one(min_perf_pct, min_perf_pct); + +define_one_global_rw(no_turbo); +define_one_global_rw(max_perf_pct); +define_one_global_rw(min_perf_pct); + +static struct attribute *intel_pstate_attributes[] = { + &no_turbo.attr, + &max_perf_pct.attr, + &min_perf_pct.attr, + NULL +}; + +static struct attribute_group intel_pstate_attr_group = { + .attrs = intel_pstate_attributes, +}; +static struct kobject *intel_pstate_kobject; + +static void intel_pstate_sysfs_expose_params(void) +{ + int rc; + + intel_pstate_kobject = kobject_create_and_add("intel_pstate", + &cpu_subsys.dev_root->kobj); + BUG_ON(!intel_pstate_kobject); + rc = sysfs_create_group(intel_pstate_kobject, + &intel_pstate_attr_group); + BUG_ON(rc); +} + +/************************** sysfs end ************************/ + +static int intel_pstate_min_pstate(void) +{ + u64 value; + rdmsrl(MSR_PLATFORM_INFO, value); + return (value >> 40) & 0xFF; +} + +static int intel_pstate_max_pstate(void) +{ + u64 value; + rdmsrl(MSR_PLATFORM_INFO, value); + return (value >> 8) & 0xFF; +} + +static int intel_pstate_turbo_pstate(void) +{ + u64 value; + int nont, ret; + rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value); + nont = intel_pstate_max_pstate(); + ret = ((value) & 255); + if (ret <= nont) + ret = nont; + return ret; +} + +static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) +{ + int max_perf = cpu->pstate.turbo_pstate; + int max_perf_adj; + int min_perf; + if (limits.no_turbo) + max_perf = cpu->pstate.max_pstate; + + max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf)); + *max = clamp_t(int, max_perf_adj, + cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); + + min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.min_perf)); + *min = clamp_t(int, min_perf, + cpu->pstate.min_pstate, max_perf); +} + +static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate) +{ + int max_perf, min_perf; + + intel_pstate_get_min_max(cpu, &min_perf, &max_perf); + + pstate = clamp_t(int, pstate, min_perf, max_perf); + + if (pstate == cpu->pstate.current_pstate) + return; + + trace_cpu_frequency(pstate * 100000, cpu->cpu); + + cpu->pstate.current_pstate = pstate; + if (limits.no_turbo) + wrmsrl(MSR_IA32_PERF_CTL, BIT(32) | (pstate << 8)); + else + wrmsrl(MSR_IA32_PERF_CTL, pstate << 8); + +} + +static inline void intel_pstate_pstate_increase(struct cpudata *cpu, int steps) +{ + int target; + target = cpu->pstate.current_pstate + steps; + + intel_pstate_set_pstate(cpu, target); +} + +static inline void intel_pstate_pstate_decrease(struct cpudata *cpu, int steps) +{ + int target; + target = cpu->pstate.current_pstate - steps; + intel_pstate_set_pstate(cpu, target); +} + +static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) +{ + sprintf(cpu->name, "Intel 2nd generation core"); + + cpu->pstate.min_pstate = intel_pstate_min_pstate(); + cpu->pstate.max_pstate = intel_pstate_max_pstate(); + cpu->pstate.turbo_pstate = intel_pstate_turbo_pstate(); + + /* + * goto max pstate so we don't slow up boot if we are built-in if we are + * a module we will take care of it during normal operation + */ + intel_pstate_set_pstate(cpu, cpu->pstate.max_pstate); +} + +static inline void intel_pstate_calc_busy(struct cpudata *cpu, + struct sample *sample) +{ + u64 core_pct; + core_pct = div64_u64(int_tofp(sample->aperf * 100), + sample->mperf); + sample->freq = fp_toint(cpu->pstate.max_pstate * core_pct * 1000); + + sample->core_pct_busy = core_pct; +} + +static inline void intel_pstate_sample(struct cpudata *cpu) +{ + u64 aperf, mperf; + + rdmsrl(MSR_IA32_APERF, aperf); + rdmsrl(MSR_IA32_MPERF, mperf); + cpu->sample_ptr = (cpu->sample_ptr + 1) % SAMPLE_COUNT; + cpu->samples[cpu->sample_ptr].aperf = aperf; + cpu->samples[cpu->sample_ptr].mperf = mperf; + cpu->samples[cpu->sample_ptr].aperf -= cpu->prev_aperf; + cpu->samples[cpu->sample_ptr].mperf -= cpu->prev_mperf; + + intel_pstate_calc_busy(cpu, &cpu->samples[cpu->sample_ptr]); + + cpu->prev_aperf = aperf; + cpu->prev_mperf = mperf; +} + +static inline void intel_pstate_set_sample_time(struct cpudata *cpu) +{ + int sample_time, delay; + + sample_time = cpu->pstate_policy->sample_rate_ms; + delay = msecs_to_jiffies(sample_time); + mod_timer_pinned(&cpu->timer, jiffies + delay); +} + +static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) +{ + int32_t core_busy, max_pstate, current_pstate; + + core_busy = cpu->samples[cpu->sample_ptr].core_pct_busy; + max_pstate = int_tofp(cpu->pstate.max_pstate); + current_pstate = int_tofp(cpu->pstate.current_pstate); + return mul_fp(core_busy, div_fp(max_pstate, current_pstate)); +} + +static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) +{ + int32_t busy_scaled; + struct _pid *pid; + signed int ctl = 0; + int steps; + + pid = &cpu->pid; + busy_scaled = intel_pstate_get_scaled_busy(cpu); + + ctl = pid_calc(pid, busy_scaled); + + steps = abs(ctl); + if (ctl < 0) + intel_pstate_pstate_increase(cpu, steps); + else + intel_pstate_pstate_decrease(cpu, steps); +} + +static void intel_pstate_timer_func(unsigned long __data) +{ + struct cpudata *cpu = (struct cpudata *) __data; + + intel_pstate_sample(cpu); + intel_pstate_adjust_busy_pstate(cpu); + + if (cpu->pstate.current_pstate == cpu->pstate.min_pstate) { + cpu->min_pstate_count++; + if (!(cpu->min_pstate_count % 5)) { + intel_pstate_set_pstate(cpu, cpu->pstate.max_pstate); + } + } else + cpu->min_pstate_count = 0; + + intel_pstate_set_sample_time(cpu); +} + +#define ICPU(model, policy) \ + { X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\ + (unsigned long)&policy } + +static const struct x86_cpu_id intel_pstate_cpu_ids[] = { + ICPU(0x2a, default_policy), + ICPU(0x2d, default_policy), + ICPU(0x3a, default_policy), + ICPU(0x3c, default_policy), + ICPU(0x3e, default_policy), + ICPU(0x3f, default_policy), + ICPU(0x45, default_policy), + ICPU(0x46, default_policy), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); + +static int intel_pstate_init_cpu(unsigned int cpunum) +{ + + const struct x86_cpu_id *id; + struct cpudata *cpu; + + id = x86_match_cpu(intel_pstate_cpu_ids); + if (!id) + return -ENODEV; + + all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata), GFP_KERNEL); + if (!all_cpu_data[cpunum]) + return -ENOMEM; + + cpu = all_cpu_data[cpunum]; + + intel_pstate_get_cpu_pstates(cpu); + if (!cpu->pstate.current_pstate) { + all_cpu_data[cpunum] = NULL; + kfree(cpu); + return -ENODATA; + } + + cpu->cpu = cpunum; + cpu->pstate_policy = + (struct pstate_adjust_policy *)id->driver_data; + init_timer_deferrable(&cpu->timer); + cpu->timer.function = intel_pstate_timer_func; + cpu->timer.data = + (unsigned long)cpu; + cpu->timer.expires = jiffies + HZ/100; + intel_pstate_busy_pid_reset(cpu); + intel_pstate_sample(cpu); + intel_pstate_set_pstate(cpu, cpu->pstate.max_pstate); + + add_timer_on(&cpu->timer, cpunum); + + pr_info("Intel pstate controlling: cpu %d\n", cpunum); + + return 0; +} + +static unsigned int intel_pstate_get(unsigned int cpu_num) +{ + struct sample *sample; + struct cpudata *cpu; + + cpu = all_cpu_data[cpu_num]; + if (!cpu) + return 0; + sample = &cpu->samples[cpu->sample_ptr]; + return sample->freq; +} + +static int intel_pstate_set_policy(struct cpufreq_policy *policy) +{ + struct cpudata *cpu; + + cpu = all_cpu_data[policy->cpu]; + + if (!policy->cpuinfo.max_freq) + return -ENODEV; + + if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { + limits.min_perf_pct = 100; + limits.min_perf = int_tofp(1); + limits.max_policy_pct = 100; + limits.max_perf_pct = 100; + limits.max_perf = int_tofp(1); + limits.no_turbo = 0; + return 0; + } + limits.min_perf_pct = (policy->min * 100) / policy->cpuinfo.max_freq; + limits.min_perf_pct = clamp_t(int, limits.min_perf_pct, 0 , 100); + limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100)); + + limits.max_policy_pct = policy->max * 100 / policy->cpuinfo.max_freq; + limits.max_policy_pct = clamp_t(int, limits.max_policy_pct, 0 , 100); + limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct); + limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100)); + + return 0; +} + +static int intel_pstate_verify_policy(struct cpufreq_policy *policy) +{ + cpufreq_verify_within_limits(policy, + policy->cpuinfo.min_freq, + policy->cpuinfo.max_freq); + + if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) && + (policy->policy != CPUFREQ_POLICY_PERFORMANCE)) + return -EINVAL; + + return 0; +} + +static int __cpuinit intel_pstate_cpu_exit(struct cpufreq_policy *policy) +{ + int cpu = policy->cpu; + + del_timer(&all_cpu_data[cpu]->timer); + kfree(all_cpu_data[cpu]); + all_cpu_data[cpu] = NULL; + return 0; +} + +static int __cpuinit intel_pstate_cpu_init(struct cpufreq_policy *policy) +{ + struct cpudata *cpu; + int rc; + + rc = intel_pstate_init_cpu(policy->cpu); + if (rc) + return rc; + + cpu = all_cpu_data[policy->cpu]; + + if (!limits.no_turbo && + limits.min_perf_pct == 100 && limits.max_perf_pct == 100) + policy->policy = CPUFREQ_POLICY_PERFORMANCE; + else + policy->policy = CPUFREQ_POLICY_POWERSAVE; + + policy->min = cpu->pstate.min_pstate * 100000; + policy->max = cpu->pstate.turbo_pstate * 100000; + + /* cpuinfo and default policy values */ + policy->cpuinfo.min_freq = cpu->pstate.min_pstate * 100000; + policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate * 100000; + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + cpumask_set_cpu(policy->cpu, policy->cpus); + + return 0; +} + +static struct cpufreq_driver intel_pstate_driver = { + .flags = CPUFREQ_CONST_LOOPS, + .verify = intel_pstate_verify_policy, + .setpolicy = intel_pstate_set_policy, + .get = intel_pstate_get, + .init = intel_pstate_cpu_init, + .exit = intel_pstate_cpu_exit, + .name = "intel_pstate", + .owner = THIS_MODULE, +}; + +static int __initdata no_load; + +static int intel_pstate_msrs_not_valid(void) +{ + /* Check that all the msr's we are using are valid. */ + u64 aperf, mperf, tmp; + + rdmsrl(MSR_IA32_APERF, aperf); + rdmsrl(MSR_IA32_MPERF, mperf); + + if (!intel_pstate_min_pstate() || + !intel_pstate_max_pstate() || + !intel_pstate_turbo_pstate()) + return -ENODEV; + + rdmsrl(MSR_IA32_APERF, tmp); + if (!(tmp - aperf)) + return -ENODEV; + + rdmsrl(MSR_IA32_MPERF, tmp); + if (!(tmp - mperf)) + return -ENODEV; + + return 0; +} +static int __init intel_pstate_init(void) +{ + int cpu, rc = 0; + const struct x86_cpu_id *id; + + if (no_load) + return -ENODEV; + + id = x86_match_cpu(intel_pstate_cpu_ids); + if (!id) + return -ENODEV; + + if (intel_pstate_msrs_not_valid()) + return -ENODEV; + + pr_info("Intel P-state driver initializing.\n"); + + all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus()); + if (!all_cpu_data) + return -ENOMEM; + + rc = cpufreq_register_driver(&intel_pstate_driver); + if (rc) + goto out; + + intel_pstate_debug_expose_params(); + intel_pstate_sysfs_expose_params(); + return rc; +out: + get_online_cpus(); + for_each_online_cpu(cpu) { + if (all_cpu_data[cpu]) { + del_timer_sync(&all_cpu_data[cpu]->timer); + kfree(all_cpu_data[cpu]); + } + } + + put_online_cpus(); + vfree(all_cpu_data); + return -ENODEV; +} +device_initcall(intel_pstate_init); + +static int __init intel_pstate_setup(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "disable")) + no_load = 1; + return 0; +} +early_param("intel_pstate", intel_pstate_setup); + +MODULE_AUTHOR("Dirk Brandewie "); +MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors"); +MODULE_LICENSE("GPL"); diff --git a/src/cpufreq/4.x/Makefile b/src/cpufreq/4.x/Makefile new file mode 100644 index 000000000..1876cbefd --- /dev/null +++ b/src/cpufreq/4.x/Makefile @@ -0,0 +1,37 @@ +# CPUfreq governors +#obj-$(CONFIG_CPU_FREQ_GOV_PERFORMANCE) += cpufreq_performance.o +#obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE) += cpufreq_powersave.o +obj-m += cpufreq_userspace.o +obj-m += cpufreq_ondemand.o +obj-m += cpufreq_conservative.o +#obj-$(CONFIG_CPU_FREQ_GOV_COMMON) += cpufreq_governor.o + +#obj-$(CONFIG_CPUFREQ_DT) += cpufreq-dt.o + +################################################################################## +# x86 drivers. +# Link order matters. K8 is preferred to ACPI because of firmware bugs in early +# K8 systems. This is still the case but acpi-cpufreq errors out so that +# powernow-k8 can load then. ACPI is preferred to all other hardware-specific drivers. +# speedstep-* is preferred over p4-clockmod. + +#obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o +#obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o +#obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o +#obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o +#obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o +#obj-$(CONFIG_X86_LONGHAUL) += longhaul.o +#obj-$(CONFIG_X86_E_POWERSAVER) += e_powersaver.o +#obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o +#obj-$(CONFIG_SC520_CPUFREQ) += sc520_freq.o +#obj-$(CONFIG_X86_LONGRUN) += longrun.o +#obj-$(CONFIG_X86_GX_SUSPMOD) += gx-suspmod.o +#obj-$(CONFIG_X86_SPEEDSTEP_ICH) += speedstep-ich.o +#obj-$(CONFIG_X86_SPEEDSTEP_LIB) += speedstep-lib.o +#obj-$(CONFIG_X86_SPEEDSTEP_SMI) += speedstep-smi.o +#obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o +#obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o +#obj-$(CONFIG_X86_CPUFREQ_NFORCE2) += cpufreq-nforce2.o +#obj-$(CONFIG_X86_AMD_FREQ_SENSITIVITY) += amd_freq_sensitivity.o +#obj-m += intel_pstate.o +#obj-$(CONFIG_X86_SFI_CPUFREQ) += sfi-cpufreq.o diff --git a/src/cpufreq/4.x/cpufreq_conservative.c b/src/cpufreq/4.x/cpufreq_conservative.c new file mode 100644 index 000000000..c395f9198 --- /dev/null +++ b/src/cpufreq/4.x/cpufreq_conservative.c @@ -0,0 +1,406 @@ +/* + * drivers/cpufreq/cpufreq_conservative.c + * + * Copyright (C) 2001 Russell King + * (C) 2003 Venkatesh Pallipadi . + * Jun Nakajima + * (C) 2009 Alexander Clouter + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include "cpufreq_governor.h" + +/* Conservative governor macros */ +#define DEF_FREQUENCY_UP_THRESHOLD (80) +#define DEF_FREQUENCY_DOWN_THRESHOLD (20) +#define DEF_FREQUENCY_STEP (5) +#define DEF_SAMPLING_DOWN_FACTOR (1) +#define MAX_SAMPLING_DOWN_FACTOR (10) + +static DEFINE_PER_CPU(struct cs_cpu_dbs_info_s, cs_cpu_dbs_info); + +static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event); + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE +static +#endif +struct cpufreq_governor cpufreq_gov_conservative = { + .name = "conservative", + .governor = cs_cpufreq_governor_dbs, + .max_transition_latency = TRANSITION_LATENCY_LIMIT, + .owner = THIS_MODULE, +}; + +static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners, + struct cpufreq_policy *policy) +{ + unsigned int freq_target = (cs_tuners->freq_step * policy->max) / 100; + + /* max freq cannot be less than 100. But who knows... */ + if (unlikely(freq_target == 0)) + freq_target = DEF_FREQUENCY_STEP; + + return freq_target; +} + +/* + * Every sampling_rate, we check, if current idle time is less than 20% + * (default), then we try to increase frequency. Every sampling_rate * + * sampling_down_factor, we check, if current idle time is more than 80% + * (default), then we try to decrease frequency + * + * Any frequency increase takes it to the maximum frequency. Frequency reduction + * happens at minimum steps of 5% (default) of maximum frequency + */ +static void cs_check_cpu(int cpu, unsigned int load) +{ + struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, cpu); + struct cpufreq_policy *policy = dbs_info->cdbs.shared->policy; + struct dbs_data *dbs_data = policy->governor_data; + struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + + /* + * break out if we 'cannot' reduce the speed as the user might + * want freq_step to be zero + */ + if (cs_tuners->freq_step == 0) + return; + + /* Check for frequency increase */ + if (load > cs_tuners->up_threshold) { + dbs_info->down_skip = 0; + + /* if we are already at full speed then break out early */ + if (dbs_info->requested_freq == policy->max) + return; + + dbs_info->requested_freq += get_freq_target(cs_tuners, policy); + + if (dbs_info->requested_freq > policy->max) + dbs_info->requested_freq = policy->max; + + __cpufreq_driver_target(policy, dbs_info->requested_freq, + CPUFREQ_RELATION_H); + return; + } + + /* if sampling_down_factor is active break out early */ + if (++dbs_info->down_skip < cs_tuners->sampling_down_factor) + return; + dbs_info->down_skip = 0; + + /* Check for frequency decrease */ + if (load < cs_tuners->down_threshold) { + unsigned int freq_target; + /* + * if we cannot reduce the frequency anymore, break out early + */ + if (policy->cur == policy->min) + return; + + freq_target = get_freq_target(cs_tuners, policy); + if (dbs_info->requested_freq > freq_target) + dbs_info->requested_freq -= freq_target; + else + dbs_info->requested_freq = policy->min; + + __cpufreq_driver_target(policy, dbs_info->requested_freq, + CPUFREQ_RELATION_L); + return; + } +} + +static unsigned int cs_dbs_timer(struct cpu_dbs_info *cdbs, + struct dbs_data *dbs_data, bool modify_all) +{ + struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + + if (modify_all) + dbs_check_cpu(dbs_data, cdbs->shared->policy->cpu); + + return delay_for_sampling_rate(cs_tuners->sampling_rate); +} + +static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_freqs *freq = data; + struct cs_cpu_dbs_info_s *dbs_info = + &per_cpu(cs_cpu_dbs_info, freq->cpu); + struct cpufreq_policy *policy = cpufreq_cpu_get_raw(freq->cpu); + + if (!policy) + return 0; + + /* policy isn't governed by conservative governor */ + if (policy->governor != &cpufreq_gov_conservative) + return 0; + + /* + * we only care if our internally tracked freq moves outside the 'valid' + * ranges of frequency available to us otherwise we do not change it + */ + if (dbs_info->requested_freq > policy->max + || dbs_info->requested_freq < policy->min) + dbs_info->requested_freq = freq->new; + + return 0; +} + +static struct notifier_block cs_cpufreq_notifier_block = { + .notifier_call = dbs_cpufreq_notifier, +}; + +/************************** sysfs interface ************************/ +static struct common_dbs_data cs_dbs_cdata; + +static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, + const char *buf, size_t count) +{ + struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) + return -EINVAL; + + cs_tuners->sampling_down_factor = input; + return count; +} + +static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, + size_t count) +{ + struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1) + return -EINVAL; + + cs_tuners->sampling_rate = max(input, dbs_data->min_sampling_rate); + return count; +} + +static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, + size_t count) +{ + struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1 || input > 100 || input <= cs_tuners->down_threshold) + return -EINVAL; + + cs_tuners->up_threshold = input; + return count; +} + +static ssize_t store_down_threshold(struct dbs_data *dbs_data, const char *buf, + size_t count) +{ + struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + /* cannot be lower than 1 otherwise freq will not fall */ + if (ret != 1 || input < 1 || input > 100 || + input >= cs_tuners->up_threshold) + return -EINVAL; + + cs_tuners->down_threshold = input; + return count; +} + +static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, + const char *buf, size_t count) +{ + struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + unsigned int input, j; + int ret; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + if (input > 1) + input = 1; + + if (input == cs_tuners->ignore_nice_load) /* nothing to do */ + return count; + + cs_tuners->ignore_nice_load = input; + + /* we need to re-evaluate prev_cpu_idle */ + for_each_online_cpu(j) { + struct cs_cpu_dbs_info_s *dbs_info; + dbs_info = &per_cpu(cs_cpu_dbs_info, j); + dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, + &dbs_info->cdbs.prev_cpu_wall, 0); + if (cs_tuners->ignore_nice_load) + dbs_info->cdbs.prev_cpu_nice = + kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + } + return count; +} + +static ssize_t store_freq_step(struct dbs_data *dbs_data, const char *buf, + size_t count) +{ + struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1) + return -EINVAL; + + if (input > 100) + input = 100; + + /* + * no need to test here if freq_step is zero as the user might actually + * want this, they would be crazy though :) + */ + cs_tuners->freq_step = input; + return count; +} + +show_store_one(cs, sampling_rate); +show_store_one(cs, sampling_down_factor); +show_store_one(cs, up_threshold); +show_store_one(cs, down_threshold); +show_store_one(cs, ignore_nice_load); +show_store_one(cs, freq_step); +declare_show_sampling_rate_min(cs); + +gov_sys_pol_attr_rw(sampling_rate); +gov_sys_pol_attr_rw(sampling_down_factor); +gov_sys_pol_attr_rw(up_threshold); +gov_sys_pol_attr_rw(down_threshold); +gov_sys_pol_attr_rw(ignore_nice_load); +gov_sys_pol_attr_rw(freq_step); +gov_sys_pol_attr_ro(sampling_rate_min); + +static struct attribute *dbs_attributes_gov_sys[] = { + &sampling_rate_min_gov_sys.attr, + &sampling_rate_gov_sys.attr, + &sampling_down_factor_gov_sys.attr, + &up_threshold_gov_sys.attr, + &down_threshold_gov_sys.attr, + &ignore_nice_load_gov_sys.attr, + &freq_step_gov_sys.attr, + NULL +}; + +static struct attribute_group cs_attr_group_gov_sys = { + .attrs = dbs_attributes_gov_sys, + .name = "conservative", +}; + +static struct attribute *dbs_attributes_gov_pol[] = { + &sampling_rate_min_gov_pol.attr, + &sampling_rate_gov_pol.attr, + &sampling_down_factor_gov_pol.attr, + &up_threshold_gov_pol.attr, + &down_threshold_gov_pol.attr, + &ignore_nice_load_gov_pol.attr, + &freq_step_gov_pol.attr, + NULL +}; + +static struct attribute_group cs_attr_group_gov_pol = { + .attrs = dbs_attributes_gov_pol, + .name = "conservative", +}; + +/************************** sysfs end ************************/ + +static int cs_init(struct dbs_data *dbs_data, bool notify) +{ + struct cs_dbs_tuners *tuners; + + tuners = kzalloc(sizeof(*tuners), GFP_KERNEL); + if (!tuners) { + pr_err("%s: kzalloc failed\n", __func__); + return -ENOMEM; + } + + tuners->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; + tuners->down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD; + tuners->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; + tuners->ignore_nice_load = 0; + tuners->freq_step = DEF_FREQUENCY_STEP; + + dbs_data->tuners = tuners; + dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO * + jiffies_to_usecs(10); + + if (notify) + cpufreq_register_notifier(&cs_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + + return 0; +} + +static void cs_exit(struct dbs_data *dbs_data, bool notify) +{ + if (notify) + cpufreq_unregister_notifier(&cs_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + + kfree(dbs_data->tuners); +} + +define_get_cpu_dbs_routines(cs_cpu_dbs_info); + +static struct common_dbs_data cs_dbs_cdata = { + .governor = GOV_CONSERVATIVE, + .attr_group_gov_sys = &cs_attr_group_gov_sys, + .attr_group_gov_pol = &cs_attr_group_gov_pol, + .get_cpu_cdbs = get_cpu_cdbs, + .get_cpu_dbs_info_s = get_cpu_dbs_info_s, + .gov_dbs_timer = cs_dbs_timer, + .gov_check_cpu = cs_check_cpu, + .init = cs_init, + .exit = cs_exit, + .mutex = __MUTEX_INITIALIZER(cs_dbs_cdata.mutex), +}; + +static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event) +{ + return cpufreq_governor_dbs(policy, &cs_dbs_cdata, event); +} + +static int __init cpufreq_gov_dbs_init(void) +{ + return cpufreq_register_governor(&cpufreq_gov_conservative); +} + +static void __exit cpufreq_gov_dbs_exit(void) +{ + cpufreq_unregister_governor(&cpufreq_gov_conservative); +} + +MODULE_AUTHOR("Alexander Clouter "); +MODULE_DESCRIPTION("'cpufreq_conservative' - A dynamic cpufreq governor for " + "Low Latency Frequency Transition capable processors " + "optimised for use in a battery environment"); +MODULE_LICENSE("GPL"); + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE +fs_initcall(cpufreq_gov_dbs_init); +#else +module_init(cpufreq_gov_dbs_init); +#endif +module_exit(cpufreq_gov_dbs_exit); diff --git a/src/cpufreq/4.x/cpufreq_governor.h b/src/cpufreq/4.x/cpufreq_governor.h new file mode 100644 index 000000000..f7b340c27 --- /dev/null +++ b/src/cpufreq/4.x/cpufreq_governor.h @@ -0,0 +1,281 @@ +/* + * drivers/cpufreq/cpufreq_governor.h + * + * Header file for CPUFreq governors common code + * + * Copyright (C) 2001 Russell King + * (C) 2003 Venkatesh Pallipadi . + * (C) 2003 Jun Nakajima + * (C) 2009 Alexander Clouter + * (c) 2012 Viresh Kumar + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _CPUFREQ_GOVERNOR_H +#define _CPUFREQ_GOVERNOR_H + +#include +#include +#include +#include + +/* + * The polling frequency depends on the capability of the processor. Default + * polling frequency is 1000 times the transition latency of the processor. The + * governor will work on any processor with transition latency <= 10ms, using + * appropriate sampling rate. + * + * For CPUs with transition latency > 10ms (mostly drivers with CPUFREQ_ETERNAL) + * this governor will not work. All times here are in us (micro seconds). + */ +#define MIN_SAMPLING_RATE_RATIO (2) +#define LATENCY_MULTIPLIER (1000) +#define MIN_LATENCY_MULTIPLIER (20) +#define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) + +/* Ondemand Sampling types */ +enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE}; + +/* + * Macro for creating governors sysfs routines + * + * - gov_sys: One governor instance per whole system + * - gov_pol: One governor instance per policy + */ + +/* Create attributes */ +#define gov_sys_attr_ro(_name) \ +static struct kobj_attribute _name##_gov_sys = \ +__ATTR(_name, 0444, show_##_name##_gov_sys, NULL) + +#define gov_sys_attr_rw(_name) \ +static struct kobj_attribute _name##_gov_sys = \ +__ATTR(_name, 0644, show_##_name##_gov_sys, store_##_name##_gov_sys) + +#define gov_pol_attr_ro(_name) \ +static struct freq_attr _name##_gov_pol = \ +__ATTR(_name, 0444, show_##_name##_gov_pol, NULL) + +#define gov_pol_attr_rw(_name) \ +static struct freq_attr _name##_gov_pol = \ +__ATTR(_name, 0644, show_##_name##_gov_pol, store_##_name##_gov_pol) + +#define gov_sys_pol_attr_rw(_name) \ + gov_sys_attr_rw(_name); \ + gov_pol_attr_rw(_name) + +#define gov_sys_pol_attr_ro(_name) \ + gov_sys_attr_ro(_name); \ + gov_pol_attr_ro(_name) + +/* Create show/store routines */ +#define show_one(_gov, file_name) \ +static ssize_t show_##file_name##_gov_sys \ +(struct kobject *kobj, struct kobj_attribute *attr, char *buf) \ +{ \ + struct _gov##_dbs_tuners *tuners = _gov##_dbs_cdata.gdbs_data->tuners; \ + return sprintf(buf, "%u\n", tuners->file_name); \ +} \ + \ +static ssize_t show_##file_name##_gov_pol \ +(struct cpufreq_policy *policy, char *buf) \ +{ \ + struct dbs_data *dbs_data = policy->governor_data; \ + struct _gov##_dbs_tuners *tuners = dbs_data->tuners; \ + return sprintf(buf, "%u\n", tuners->file_name); \ +} + +#define store_one(_gov, file_name) \ +static ssize_t store_##file_name##_gov_sys \ +(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) \ +{ \ + struct dbs_data *dbs_data = _gov##_dbs_cdata.gdbs_data; \ + return store_##file_name(dbs_data, buf, count); \ +} \ + \ +static ssize_t store_##file_name##_gov_pol \ +(struct cpufreq_policy *policy, const char *buf, size_t count) \ +{ \ + struct dbs_data *dbs_data = policy->governor_data; \ + return store_##file_name(dbs_data, buf, count); \ +} + +#define show_store_one(_gov, file_name) \ +show_one(_gov, file_name); \ +store_one(_gov, file_name) + +/* create helper routines */ +#define define_get_cpu_dbs_routines(_dbs_info) \ +static struct cpu_dbs_info *get_cpu_cdbs(int cpu) \ +{ \ + return &per_cpu(_dbs_info, cpu).cdbs; \ +} \ + \ +static void *get_cpu_dbs_info_s(int cpu) \ +{ \ + return &per_cpu(_dbs_info, cpu); \ +} + +/* + * Abbreviations: + * dbs: used as a shortform for demand based switching It helps to keep variable + * names smaller, simpler + * cdbs: common dbs + * od_*: On-demand governor + * cs_*: Conservative governor + */ + +/* Common to all CPUs of a policy */ +struct cpu_common_dbs_info { + struct cpufreq_policy *policy; + /* + * percpu mutex that serializes governor limit change with dbs_timer + * invocation. We do not want dbs_timer to run when user is changing + * the governor or limits. + */ + struct mutex timer_mutex; + ktime_t time_stamp; +}; + +/* Per cpu structures */ +struct cpu_dbs_info { + u64 prev_cpu_idle; + u64 prev_cpu_wall; + u64 prev_cpu_nice; + /* + * Used to keep track of load in the previous interval. However, when + * explicitly set to zero, it is used as a flag to ensure that we copy + * the previous load to the current interval only once, upon the first + * wake-up from idle. + */ + unsigned int prev_load; + struct delayed_work dwork; + struct cpu_common_dbs_info *shared; +}; + +struct od_cpu_dbs_info_s { + struct cpu_dbs_info cdbs; + struct cpufreq_frequency_table *freq_table; + unsigned int freq_lo; + unsigned int freq_lo_jiffies; + unsigned int freq_hi_jiffies; + unsigned int rate_mult; + unsigned int sample_type:1; +}; + +struct cs_cpu_dbs_info_s { + struct cpu_dbs_info cdbs; + unsigned int down_skip; + unsigned int requested_freq; +}; + +/* Per policy Governors sysfs tunables */ +struct od_dbs_tuners { + unsigned int ignore_nice_load; + unsigned int sampling_rate; + unsigned int sampling_down_factor; + unsigned int up_threshold; + unsigned int powersave_bias; + unsigned int io_is_busy; +}; + +struct cs_dbs_tuners { + unsigned int ignore_nice_load; + unsigned int sampling_rate; + unsigned int sampling_down_factor; + unsigned int up_threshold; + unsigned int down_threshold; + unsigned int freq_step; +}; + +/* Common Governor data across policies */ +struct dbs_data; +struct common_dbs_data { + /* Common across governors */ + #define GOV_ONDEMAND 0 + #define GOV_CONSERVATIVE 1 + int governor; + struct attribute_group *attr_group_gov_sys; /* one governor - system */ + struct attribute_group *attr_group_gov_pol; /* one governor - policy */ + + /* + * Common data for platforms that don't set + * CPUFREQ_HAVE_GOVERNOR_PER_POLICY + */ + struct dbs_data *gdbs_data; + + struct cpu_dbs_info *(*get_cpu_cdbs)(int cpu); + void *(*get_cpu_dbs_info_s)(int cpu); + unsigned int (*gov_dbs_timer)(struct cpu_dbs_info *cdbs, + struct dbs_data *dbs_data, + bool modify_all); + void (*gov_check_cpu)(int cpu, unsigned int load); + int (*init)(struct dbs_data *dbs_data, bool notify); + void (*exit)(struct dbs_data *dbs_data, bool notify); + + /* Governor specific ops, see below */ + void *gov_ops; + + /* + * Protects governor's data (struct dbs_data and struct common_dbs_data) + */ + struct mutex mutex; +}; + +/* Governor Per policy data */ +struct dbs_data { + struct common_dbs_data *cdata; + unsigned int min_sampling_rate; + int usage_count; + void *tuners; +}; + +/* Governor specific ops, will be passed to dbs_data->gov_ops */ +struct od_ops { + void (*powersave_bias_init_cpu)(int cpu); + unsigned int (*powersave_bias_target)(struct cpufreq_policy *policy, + unsigned int freq_next, unsigned int relation); + void (*freq_increase)(struct cpufreq_policy *policy, unsigned int freq); +}; + +static inline int delay_for_sampling_rate(unsigned int sampling_rate) +{ + int delay = usecs_to_jiffies(sampling_rate); + + /* We want all CPUs to do sampling nearly on same jiffy */ + if (num_online_cpus() > 1) + delay -= jiffies % delay; + + return delay; +} + +#define declare_show_sampling_rate_min(_gov) \ +static ssize_t show_sampling_rate_min_gov_sys \ +(struct kobject *kobj, struct kobj_attribute *attr, char *buf) \ +{ \ + struct dbs_data *dbs_data = _gov##_dbs_cdata.gdbs_data; \ + return sprintf(buf, "%u\n", dbs_data->min_sampling_rate); \ +} \ + \ +static ssize_t show_sampling_rate_min_gov_pol \ +(struct cpufreq_policy *policy, char *buf) \ +{ \ + struct dbs_data *dbs_data = policy->governor_data; \ + return sprintf(buf, "%u\n", dbs_data->min_sampling_rate); \ +} + +extern struct mutex cpufreq_governor_lock; + +void dbs_check_cpu(struct dbs_data *dbs_data, int cpu); +int cpufreq_governor_dbs(struct cpufreq_policy *policy, + struct common_dbs_data *cdata, unsigned int event); +void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy, + unsigned int delay, bool all_cpus); +void od_register_powersave_bias_handler(unsigned int (*f) + (struct cpufreq_policy *, unsigned int, unsigned int), + unsigned int powersave_bias); +void od_unregister_powersave_bias_handler(void); +#endif /* _CPUFREQ_GOVERNOR_H */ diff --git a/src/cpufreq/4.x/cpufreq_ondemand.c b/src/cpufreq/4.x/cpufreq_ondemand.c new file mode 100644 index 000000000..03ac6ce54 --- /dev/null +++ b/src/cpufreq/4.x/cpufreq_ondemand.c @@ -0,0 +1,620 @@ +/* + * drivers/cpufreq/cpufreq_ondemand.c + * + * Copyright (C) 2001 Russell King + * (C) 2003 Venkatesh Pallipadi . + * Jun Nakajima + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include "cpufreq_governor.h" + +/* On-demand governor macros */ +#define DEF_FREQUENCY_UP_THRESHOLD (80) +#define DEF_SAMPLING_DOWN_FACTOR (1) +#define MAX_SAMPLING_DOWN_FACTOR (100000) +#define MICRO_FREQUENCY_UP_THRESHOLD (95) +#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) +#define MIN_FREQUENCY_UP_THRESHOLD (11) +#define MAX_FREQUENCY_UP_THRESHOLD (100) + +static DEFINE_PER_CPU(struct od_cpu_dbs_info_s, od_cpu_dbs_info); + +static struct od_ops od_ops; + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND +static struct cpufreq_governor cpufreq_gov_ondemand; +#endif + +static unsigned int default_powersave_bias; + +static void ondemand_powersave_bias_init_cpu(int cpu) +{ + struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + + dbs_info->freq_table = cpufreq_frequency_get_table(cpu); + dbs_info->freq_lo = 0; +} + +/* + * Not all CPUs want IO time to be accounted as busy; this depends on how + * efficient idling at a higher frequency/voltage is. + * Pavel Machek says this is not so for various generations of AMD and old + * Intel systems. + * Mike Chan (android.com) claims this is also not true for ARM. + * Because of this, whitelist specific known (series) of CPUs by default, and + * leave all others up to the user. + */ +static int should_io_be_busy(void) +{ +#if defined(CONFIG_X86) + /* + * For Intel, Core 2 (model 15) and later have an efficient idle. + */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 6 && + boot_cpu_data.x86_model >= 15) + return 1; +#endif + return 0; +} + +/* + * Find right freq to be set now with powersave_bias on. + * Returns the freq_hi to be used right now and will set freq_hi_jiffies, + * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs. + */ +static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, + unsigned int freq_next, unsigned int relation) +{ + unsigned int freq_req, freq_reduc, freq_avg; + unsigned int freq_hi, freq_lo; + unsigned int index = 0; + unsigned int jiffies_total, jiffies_hi, jiffies_lo; + struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, + policy->cpu); + struct dbs_data *dbs_data = policy->governor_data; + struct od_dbs_tuners *od_tuners = dbs_data->tuners; + + if (!dbs_info->freq_table) { + dbs_info->freq_lo = 0; + dbs_info->freq_lo_jiffies = 0; + return freq_next; + } + + cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next, + relation, &index); + freq_req = dbs_info->freq_table[index].frequency; + freq_reduc = freq_req * od_tuners->powersave_bias / 1000; + freq_avg = freq_req - freq_reduc; + + /* Find freq bounds for freq_avg in freq_table */ + index = 0; + cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg, + CPUFREQ_RELATION_H, &index); + freq_lo = dbs_info->freq_table[index].frequency; + index = 0; + cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg, + CPUFREQ_RELATION_L, &index); + freq_hi = dbs_info->freq_table[index].frequency; + + /* Find out how long we have to be in hi and lo freqs */ + if (freq_hi == freq_lo) { + dbs_info->freq_lo = 0; + dbs_info->freq_lo_jiffies = 0; + return freq_lo; + } + jiffies_total = usecs_to_jiffies(od_tuners->sampling_rate); + jiffies_hi = (freq_avg - freq_lo) * jiffies_total; + jiffies_hi += ((freq_hi - freq_lo) / 2); + jiffies_hi /= (freq_hi - freq_lo); + jiffies_lo = jiffies_total - jiffies_hi; + dbs_info->freq_lo = freq_lo; + dbs_info->freq_lo_jiffies = jiffies_lo; + dbs_info->freq_hi_jiffies = jiffies_hi; + return freq_hi; +} + +static void ondemand_powersave_bias_init(void) +{ + int i; + for_each_online_cpu(i) { + ondemand_powersave_bias_init_cpu(i); + } +} + +static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq) +{ + struct dbs_data *dbs_data = policy->governor_data; + struct od_dbs_tuners *od_tuners = dbs_data->tuners; + + if (od_tuners->powersave_bias) + freq = od_ops.powersave_bias_target(policy, freq, + CPUFREQ_RELATION_H); + else if (policy->cur == policy->max) + return; + + __cpufreq_driver_target(policy, freq, od_tuners->powersave_bias ? + CPUFREQ_RELATION_L : CPUFREQ_RELATION_H); +} + +/* + * Every sampling_rate, we check, if current idle time is less than 20% + * (default), then we try to increase frequency. Else, we adjust the frequency + * proportional to load. + */ +static void od_check_cpu(int cpu, unsigned int load) +{ + struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + struct cpufreq_policy *policy = dbs_info->cdbs.shared->policy; + struct dbs_data *dbs_data = policy->governor_data; + struct od_dbs_tuners *od_tuners = dbs_data->tuners; + + dbs_info->freq_lo = 0; + + /* Check for frequency increase */ + if (load > od_tuners->up_threshold) { + /* If switching to max speed, apply sampling_down_factor */ + if (policy->cur < policy->max) + dbs_info->rate_mult = + od_tuners->sampling_down_factor; + dbs_freq_increase(policy, policy->max); + } else { + /* Calculate the next frequency proportional to load */ + unsigned int freq_next, min_f, max_f; + + min_f = policy->cpuinfo.min_freq; + max_f = policy->cpuinfo.max_freq; + freq_next = min_f + load * (max_f - min_f) / 100; + + /* No longer fully busy, reset rate_mult */ + dbs_info->rate_mult = 1; + + if (!od_tuners->powersave_bias) { + __cpufreq_driver_target(policy, freq_next, + CPUFREQ_RELATION_C); + return; + } + + freq_next = od_ops.powersave_bias_target(policy, freq_next, + CPUFREQ_RELATION_L); + __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_C); + } +} + +static unsigned int od_dbs_timer(struct cpu_dbs_info *cdbs, + struct dbs_data *dbs_data, bool modify_all) +{ + struct cpufreq_policy *policy = cdbs->shared->policy; + unsigned int cpu = policy->cpu; + struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, + cpu); + struct od_dbs_tuners *od_tuners = dbs_data->tuners; + int delay = 0, sample_type = dbs_info->sample_type; + + if (!modify_all) + goto max_delay; + + /* Common NORMAL_SAMPLE setup */ + dbs_info->sample_type = OD_NORMAL_SAMPLE; + if (sample_type == OD_SUB_SAMPLE) { + delay = dbs_info->freq_lo_jiffies; + __cpufreq_driver_target(policy, dbs_info->freq_lo, + CPUFREQ_RELATION_H); + } else { + dbs_check_cpu(dbs_data, cpu); + if (dbs_info->freq_lo) { + /* Setup timer for SUB_SAMPLE */ + dbs_info->sample_type = OD_SUB_SAMPLE; + delay = dbs_info->freq_hi_jiffies; + } + } + +max_delay: + if (!delay) + delay = delay_for_sampling_rate(od_tuners->sampling_rate + * dbs_info->rate_mult); + + return delay; +} + +/************************** sysfs interface ************************/ +static struct common_dbs_data od_dbs_cdata; + +/** + * update_sampling_rate - update sampling rate effective immediately if needed. + * @new_rate: new sampling rate + * + * If new rate is smaller than the old, simply updating + * dbs_tuners_int.sampling_rate might not be appropriate. For example, if the + * original sampling_rate was 1 second and the requested new sampling rate is 10 + * ms because the user needs immediate reaction from ondemand governor, but not + * sure if higher frequency will be required or not, then, the governor may + * change the sampling rate too late; up to 1 second later. Thus, if we are + * reducing the sampling rate, we need to make the new value effective + * immediately. + */ +static void update_sampling_rate(struct dbs_data *dbs_data, + unsigned int new_rate) +{ + struct od_dbs_tuners *od_tuners = dbs_data->tuners; + int cpu; + + od_tuners->sampling_rate = new_rate = max(new_rate, + dbs_data->min_sampling_rate); + + for_each_online_cpu(cpu) { + struct cpufreq_policy *policy; + struct od_cpu_dbs_info_s *dbs_info; + unsigned long next_sampling, appointed_at; + + policy = cpufreq_cpu_get(cpu); + if (!policy) + continue; + if (policy->governor != &cpufreq_gov_ondemand) { + cpufreq_cpu_put(policy); + continue; + } + dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + cpufreq_cpu_put(policy); + + if (!delayed_work_pending(&dbs_info->cdbs.dwork)) + continue; + + next_sampling = jiffies + usecs_to_jiffies(new_rate); + appointed_at = dbs_info->cdbs.dwork.timer.expires; + + if (time_before(next_sampling, appointed_at)) { + cancel_delayed_work_sync(&dbs_info->cdbs.dwork); + + gov_queue_work(dbs_data, policy, + usecs_to_jiffies(new_rate), true); + + } + } +} + +static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, + size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + update_sampling_rate(dbs_data, input); + return count; +} + +static ssize_t store_io_is_busy(struct dbs_data *dbs_data, const char *buf, + size_t count) +{ + struct od_dbs_tuners *od_tuners = dbs_data->tuners; + unsigned int input; + int ret; + unsigned int j; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + od_tuners->io_is_busy = !!input; + + /* we need to re-evaluate prev_cpu_idle */ + for_each_online_cpu(j) { + struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, + j); + dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, + &dbs_info->cdbs.prev_cpu_wall, od_tuners->io_is_busy); + } + return count; +} + +static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf, + size_t count) +{ + struct od_dbs_tuners *od_tuners = dbs_data->tuners; + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || + input < MIN_FREQUENCY_UP_THRESHOLD) { + return -EINVAL; + } + + od_tuners->up_threshold = input; + return count; +} + +static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data, + const char *buf, size_t count) +{ + struct od_dbs_tuners *od_tuners = dbs_data->tuners; + unsigned int input, j; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) + return -EINVAL; + od_tuners->sampling_down_factor = input; + + /* Reset down sampling multiplier in case it was active */ + for_each_online_cpu(j) { + struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, + j); + dbs_info->rate_mult = 1; + } + return count; +} + +static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data, + const char *buf, size_t count) +{ + struct od_dbs_tuners *od_tuners = dbs_data->tuners; + unsigned int input; + int ret; + + unsigned int j; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + if (input > 1) + input = 1; + + if (input == od_tuners->ignore_nice_load) { /* nothing to do */ + return count; + } + od_tuners->ignore_nice_load = input; + + /* we need to re-evaluate prev_cpu_idle */ + for_each_online_cpu(j) { + struct od_cpu_dbs_info_s *dbs_info; + dbs_info = &per_cpu(od_cpu_dbs_info, j); + dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, + &dbs_info->cdbs.prev_cpu_wall, od_tuners->io_is_busy); + if (od_tuners->ignore_nice_load) + dbs_info->cdbs.prev_cpu_nice = + kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + + } + return count; +} + +static ssize_t store_powersave_bias(struct dbs_data *dbs_data, const char *buf, + size_t count) +{ + struct od_dbs_tuners *od_tuners = dbs_data->tuners; + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1) + return -EINVAL; + + if (input > 1000) + input = 1000; + + od_tuners->powersave_bias = input; + ondemand_powersave_bias_init(); + return count; +} + +show_store_one(od, sampling_rate); +show_store_one(od, io_is_busy); +show_store_one(od, up_threshold); +show_store_one(od, sampling_down_factor); +show_store_one(od, ignore_nice_load); +show_store_one(od, powersave_bias); +declare_show_sampling_rate_min(od); + +gov_sys_pol_attr_rw(sampling_rate); +gov_sys_pol_attr_rw(io_is_busy); +gov_sys_pol_attr_rw(up_threshold); +gov_sys_pol_attr_rw(sampling_down_factor); +gov_sys_pol_attr_rw(ignore_nice_load); +gov_sys_pol_attr_rw(powersave_bias); +gov_sys_pol_attr_ro(sampling_rate_min); + +static struct attribute *dbs_attributes_gov_sys[] = { + &sampling_rate_min_gov_sys.attr, + &sampling_rate_gov_sys.attr, + &up_threshold_gov_sys.attr, + &sampling_down_factor_gov_sys.attr, + &ignore_nice_load_gov_sys.attr, + &powersave_bias_gov_sys.attr, + &io_is_busy_gov_sys.attr, + NULL +}; + +static struct attribute_group od_attr_group_gov_sys = { + .attrs = dbs_attributes_gov_sys, + .name = "ondemand", +}; + +static struct attribute *dbs_attributes_gov_pol[] = { + &sampling_rate_min_gov_pol.attr, + &sampling_rate_gov_pol.attr, + &up_threshold_gov_pol.attr, + &sampling_down_factor_gov_pol.attr, + &ignore_nice_load_gov_pol.attr, + &powersave_bias_gov_pol.attr, + &io_is_busy_gov_pol.attr, + NULL +}; + +static struct attribute_group od_attr_group_gov_pol = { + .attrs = dbs_attributes_gov_pol, + .name = "ondemand", +}; + +/************************** sysfs end ************************/ + +static int od_init(struct dbs_data *dbs_data, bool notify) +{ + struct od_dbs_tuners *tuners; + u64 idle_time; + int cpu; + + tuners = kzalloc(sizeof(*tuners), GFP_KERNEL); + if (!tuners) { + pr_err("%s: kzalloc failed\n", __func__); + return -ENOMEM; + } + + cpu = get_cpu(); + idle_time = get_cpu_idle_time_us(cpu, NULL); + put_cpu(); + if (idle_time != -1ULL) { + /* Idle micro accounting is supported. Use finer thresholds */ + tuners->up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; + /* + * In nohz/micro accounting case we set the minimum frequency + * not depending on HZ, but fixed (very low). The deferred + * timer might skip some samples if idle/sleeping as needed. + */ + dbs_data->min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE; + } else { + tuners->up_threshold = DEF_FREQUENCY_UP_THRESHOLD; + + /* For correct statistics, we need 10 ticks for each measure */ + dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO * + jiffies_to_usecs(10); + } + + tuners->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR; + tuners->ignore_nice_load = 0; + tuners->powersave_bias = default_powersave_bias; + tuners->io_is_busy = should_io_be_busy(); + + dbs_data->tuners = tuners; + return 0; +} + +static void od_exit(struct dbs_data *dbs_data, bool notify) +{ + kfree(dbs_data->tuners); +} + +define_get_cpu_dbs_routines(od_cpu_dbs_info); + +static struct od_ops od_ops = { + .powersave_bias_init_cpu = ondemand_powersave_bias_init_cpu, + .powersave_bias_target = generic_powersave_bias_target, + .freq_increase = dbs_freq_increase, +}; + +static struct common_dbs_data od_dbs_cdata = { + .governor = GOV_ONDEMAND, + .attr_group_gov_sys = &od_attr_group_gov_sys, + .attr_group_gov_pol = &od_attr_group_gov_pol, + .get_cpu_cdbs = get_cpu_cdbs, + .get_cpu_dbs_info_s = get_cpu_dbs_info_s, + .gov_dbs_timer = od_dbs_timer, + .gov_check_cpu = od_check_cpu, + .gov_ops = &od_ops, + .init = od_init, + .exit = od_exit, + .mutex = __MUTEX_INITIALIZER(od_dbs_cdata.mutex), +}; + +static void od_set_powersave_bias(unsigned int powersave_bias) +{ + struct cpufreq_policy *policy; + struct dbs_data *dbs_data; + struct od_dbs_tuners *od_tuners; + unsigned int cpu; + cpumask_t done; + + default_powersave_bias = powersave_bias; + cpumask_clear(&done); + + get_online_cpus(); + for_each_online_cpu(cpu) { + struct cpu_common_dbs_info *shared; + + if (cpumask_test_cpu(cpu, &done)) + continue; + + shared = per_cpu(od_cpu_dbs_info, cpu).cdbs.shared; + if (!shared) + continue; + + policy = shared->policy; + cpumask_or(&done, &done, policy->cpus); + + if (policy->governor != &cpufreq_gov_ondemand) + continue; + + dbs_data = policy->governor_data; + od_tuners = dbs_data->tuners; + od_tuners->powersave_bias = default_powersave_bias; + } + put_online_cpus(); +} + +void od_register_powersave_bias_handler(unsigned int (*f) + (struct cpufreq_policy *, unsigned int, unsigned int), + unsigned int powersave_bias) +{ + od_ops.powersave_bias_target = f; + od_set_powersave_bias(powersave_bias); +} +EXPORT_SYMBOL_GPL(od_register_powersave_bias_handler); + +void od_unregister_powersave_bias_handler(void) +{ + od_ops.powersave_bias_target = generic_powersave_bias_target; + od_set_powersave_bias(0); +} +EXPORT_SYMBOL_GPL(od_unregister_powersave_bias_handler); + +static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event) +{ + return cpufreq_governor_dbs(policy, &od_dbs_cdata, event); +} + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND +static +#endif +struct cpufreq_governor cpufreq_gov_ondemand = { + .name = "ondemand", + .governor = od_cpufreq_governor_dbs, + .max_transition_latency = TRANSITION_LATENCY_LIMIT, + .owner = THIS_MODULE, +}; + +static int __init cpufreq_gov_dbs_init(void) +{ + return cpufreq_register_governor(&cpufreq_gov_ondemand); +} + +static void __exit cpufreq_gov_dbs_exit(void) +{ + cpufreq_unregister_governor(&cpufreq_gov_ondemand); +} + +MODULE_AUTHOR("Venkatesh Pallipadi "); +MODULE_AUTHOR("Alexey Starikovskiy "); +MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for " + "Low Latency Frequency Transition capable processors"); +MODULE_LICENSE("GPL"); + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND +fs_initcall(cpufreq_gov_dbs_init); +#else +module_init(cpufreq_gov_dbs_init); +#endif +module_exit(cpufreq_gov_dbs_exit); diff --git a/src/cpufreq/4.x/cpufreq_powersave.c b/src/cpufreq/4.x/cpufreq_powersave.c new file mode 100644 index 000000000..e3b874c23 --- /dev/null +++ b/src/cpufreq/4.x/cpufreq_powersave.c @@ -0,0 +1,64 @@ +/* + * linux/drivers/cpufreq/cpufreq_powersave.c + * + * Copyright (C) 2002 - 2003 Dominik Brodowski + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include + +static int cpufreq_governor_powersave(struct cpufreq_policy *policy, + unsigned int event) +{ + switch (event) { + case CPUFREQ_GOV_START: + case CPUFREQ_GOV_LIMITS: + pr_debug("setting to %u kHz because of event %u\n", + policy->min, event); + __cpufreq_driver_target(policy, policy->min, + CPUFREQ_RELATION_L); + break; + default: + break; + } + return 0; +} + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE +static +#endif +struct cpufreq_governor cpufreq_gov_powersave = { + .name = "powersave", + .governor = cpufreq_governor_powersave, + .owner = THIS_MODULE, +}; + +static int __init cpufreq_gov_powersave_init(void) +{ + return cpufreq_register_governor(&cpufreq_gov_powersave); +} + +static void __exit cpufreq_gov_powersave_exit(void) +{ + cpufreq_unregister_governor(&cpufreq_gov_powersave); +} + +MODULE_AUTHOR("Dominik Brodowski "); +MODULE_DESCRIPTION("CPUfreq policy governor 'powersave'"); +MODULE_LICENSE("GPL"); + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE +fs_initcall(cpufreq_gov_powersave_init); +#else +module_init(cpufreq_gov_powersave_init); +#endif +module_exit(cpufreq_gov_powersave_exit); diff --git a/src/cpufreq/4.x/cpufreq_userspace.c b/src/cpufreq/4.x/cpufreq_userspace.c new file mode 100644 index 000000000..9cc8abd3d --- /dev/null +++ b/src/cpufreq/4.x/cpufreq_userspace.c @@ -0,0 +1,156 @@ + +/* + * linux/drivers/cpufreq/cpufreq_userspace.c + * + * Copyright (C) 2001 Russell King + * (C) 2002 - 2004 Dominik Brodowski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include + +static DEFINE_PER_CPU(unsigned int, cpu_is_managed); +static DEFINE_MUTEX(userspace_mutex); + +/** + * cpufreq_set - set the CPU frequency + * @policy: pointer to policy struct where freq is being set + * @freq: target frequency in kHz + * + * Sets the CPU frequency to freq. + */ +static int cpufreq_set(struct cpufreq_policy *policy, unsigned int freq) +{ + int ret = -EINVAL; + unsigned int *setspeed = policy->governor_data; + + pr_debug("cpufreq_set for cpu %u, freq %u kHz\n", policy->cpu, freq); + + mutex_lock(&userspace_mutex); + if (!per_cpu(cpu_is_managed, policy->cpu)) + goto err; + + *setspeed = freq; + + ret = __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L); + err: + mutex_unlock(&userspace_mutex); + return ret; +} + +static ssize_t show_speed(struct cpufreq_policy *policy, char *buf) +{ + return sprintf(buf, "%u\n", policy->cur); +} + +static int cpufreq_userspace_policy_init(struct cpufreq_policy *policy) +{ + unsigned int *setspeed; + + setspeed = kzalloc(sizeof(*setspeed), GFP_KERNEL); + if (!setspeed) + return -ENOMEM; + + policy->governor_data = setspeed; + return 0; +} + +static int cpufreq_governor_userspace(struct cpufreq_policy *policy, + unsigned int event) +{ + unsigned int *setspeed = policy->governor_data; + unsigned int cpu = policy->cpu; + int rc = 0; + + if (event == CPUFREQ_GOV_POLICY_INIT) + return cpufreq_userspace_policy_init(policy); + + if (!setspeed) + return -EINVAL; + + switch (event) { + case CPUFREQ_GOV_POLICY_EXIT: + mutex_lock(&userspace_mutex); + policy->governor_data = NULL; + kfree(setspeed); + mutex_unlock(&userspace_mutex); + break; + case CPUFREQ_GOV_START: + BUG_ON(!policy->cur); + pr_debug("started managing cpu %u\n", cpu); + + mutex_lock(&userspace_mutex); + per_cpu(cpu_is_managed, cpu) = 1; + *setspeed = policy->cur; + mutex_unlock(&userspace_mutex); + break; + case CPUFREQ_GOV_STOP: + pr_debug("managing cpu %u stopped\n", cpu); + + mutex_lock(&userspace_mutex); + per_cpu(cpu_is_managed, cpu) = 0; + *setspeed = 0; + mutex_unlock(&userspace_mutex); + break; + case CPUFREQ_GOV_LIMITS: + mutex_lock(&userspace_mutex); + pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz, last set to %u kHz\n", + cpu, policy->min, policy->max, policy->cur, *setspeed); + + if (policy->max < *setspeed) + __cpufreq_driver_target(policy, policy->max, + CPUFREQ_RELATION_H); + else if (policy->min > *setspeed) + __cpufreq_driver_target(policy, policy->min, + CPUFREQ_RELATION_L); + else + __cpufreq_driver_target(policy, *setspeed, + CPUFREQ_RELATION_L); + mutex_unlock(&userspace_mutex); + break; + } + return rc; +} + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE +static +#endif +struct cpufreq_governor cpufreq_gov_userspace = { + .name = "userspace", + .governor = cpufreq_governor_userspace, + .store_setspeed = cpufreq_set, + .show_setspeed = show_speed, + .owner = THIS_MODULE, +}; + +static int __init cpufreq_gov_userspace_init(void) +{ + return cpufreq_register_governor(&cpufreq_gov_userspace); +} + +static void __exit cpufreq_gov_userspace_exit(void) +{ + cpufreq_unregister_governor(&cpufreq_gov_userspace); +} + +MODULE_AUTHOR("Dominik Brodowski , " + "Russell King "); +MODULE_DESCRIPTION("CPUfreq policy governor 'userspace'"); +MODULE_LICENSE("GPL"); + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE +fs_initcall(cpufreq_gov_userspace_init); +#else +module_init(cpufreq_gov_userspace_init); +#endif +module_exit(cpufreq_gov_userspace_exit); diff --git a/src/cpufreq/4.x/intel_pstate.c b/src/cpufreq/4.x/intel_pstate.c new file mode 100644 index 000000000..15fcf2cac --- /dev/null +++ b/src/cpufreq/4.x/intel_pstate.c @@ -0,0 +1,1460 @@ +/* + * intel_pstate.c: Native P state management for Intel processors + * + * (C) Copyright 2012 Intel Corporation + * Author: Dirk Brandewie + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#define ATOM_RATIOS 0x66a +#define ATOM_VIDS 0x66b +#define ATOM_TURBO_RATIOS 0x66c +#define ATOM_TURBO_VIDS 0x66d + +#define FRAC_BITS 8 +#define int_tofp(X) ((int64_t)(X) << FRAC_BITS) +#define fp_toint(X) ((X) >> FRAC_BITS) + +static inline int32_t mul_fp(int32_t x, int32_t y) +{ + return ((int64_t)x * (int64_t)y) >> FRAC_BITS; +} + +static inline int32_t div_fp(s64 x, s64 y) +{ + return div64_s64((int64_t)x << FRAC_BITS, y); +} + +static inline int ceiling_fp(int32_t x) +{ + int mask, ret; + + ret = fp_toint(x); + mask = (1 << FRAC_BITS) - 1; + if (x & mask) + ret += 1; + return ret; +} + +struct sample { + int32_t core_pct_busy; + u64 aperf; + u64 mperf; + u64 tsc; + int freq; + ktime_t time; +}; + +struct pstate_data { + int current_pstate; + int min_pstate; + int max_pstate; + int max_pstate_physical; + int scaling; + int turbo_pstate; +}; + +struct vid_data { + int min; + int max; + int turbo; + int32_t ratio; +}; + +struct _pid { + int setpoint; + int32_t integral; + int32_t p_gain; + int32_t i_gain; + int32_t d_gain; + int deadband; + int32_t last_err; +}; + +struct cpudata { + int cpu; + + struct timer_list timer; + + struct pstate_data pstate; + struct vid_data vid; + struct _pid pid; + + ktime_t last_sample_time; + u64 prev_aperf; + u64 prev_mperf; + u64 prev_tsc; + struct sample sample; +}; + +static struct cpudata **all_cpu_data; +struct pstate_adjust_policy { + int sample_rate_ms; + int deadband; + int setpoint; + int p_gain_pct; + int d_gain_pct; + int i_gain_pct; +}; + +struct pstate_funcs { + int (*get_max)(void); + int (*get_max_physical)(void); + int (*get_min)(void); + int (*get_turbo)(void); + int (*get_scaling)(void); + void (*set)(struct cpudata*, int pstate); + void (*get_vid)(struct cpudata *); +}; + +struct cpu_defaults { + struct pstate_adjust_policy pid_policy; + struct pstate_funcs funcs; +}; + +static struct pstate_adjust_policy pid_params; +static struct pstate_funcs pstate_funcs; +static int hwp_active; + +struct perf_limits { + int no_turbo; + int turbo_disabled; + int max_perf_pct; + int min_perf_pct; + int32_t max_perf; + int32_t min_perf; + int max_policy_pct; + int max_sysfs_pct; + int min_policy_pct; + int min_sysfs_pct; +}; + +static struct perf_limits performance_limits = { + .no_turbo = 0, + .turbo_disabled = 0, + .max_perf_pct = 100, + .max_perf = int_tofp(1), + .min_perf_pct = 100, + .min_perf = int_tofp(1), + .max_policy_pct = 100, + .max_sysfs_pct = 100, + .min_policy_pct = 0, + .min_sysfs_pct = 0, +}; + +static struct perf_limits powersave_limits = { + .no_turbo = 0, + .turbo_disabled = 0, + .max_perf_pct = 100, + .max_perf = int_tofp(1), + .min_perf_pct = 0, + .min_perf = 0, + .max_policy_pct = 100, + .max_sysfs_pct = 100, + .min_policy_pct = 0, + .min_sysfs_pct = 0, +}; + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE +static struct perf_limits *limits = &performance_limits; +#else +static struct perf_limits *limits = &powersave_limits; +#endif + +static inline void pid_reset(struct _pid *pid, int setpoint, int busy, + int deadband, int integral) { + pid->setpoint = setpoint; + pid->deadband = deadband; + pid->integral = int_tofp(integral); + pid->last_err = int_tofp(setpoint) - int_tofp(busy); +} + +static inline void pid_p_gain_set(struct _pid *pid, int percent) +{ + pid->p_gain = div_fp(int_tofp(percent), int_tofp(100)); +} + +static inline void pid_i_gain_set(struct _pid *pid, int percent) +{ + pid->i_gain = div_fp(int_tofp(percent), int_tofp(100)); +} + +static inline void pid_d_gain_set(struct _pid *pid, int percent) +{ + pid->d_gain = div_fp(int_tofp(percent), int_tofp(100)); +} + +static signed int pid_calc(struct _pid *pid, int32_t busy) +{ + signed int result; + int32_t pterm, dterm, fp_error; + int32_t integral_limit; + + fp_error = int_tofp(pid->setpoint) - busy; + + if (abs(fp_error) <= int_tofp(pid->deadband)) + return 0; + + pterm = mul_fp(pid->p_gain, fp_error); + + pid->integral += fp_error; + + /* + * We limit the integral here so that it will never + * get higher than 30. This prevents it from becoming + * too large an input over long periods of time and allows + * it to get factored out sooner. + * + * The value of 30 was chosen through experimentation. + */ + integral_limit = int_tofp(30); + if (pid->integral > integral_limit) + pid->integral = integral_limit; + if (pid->integral < -integral_limit) + pid->integral = -integral_limit; + + dterm = mul_fp(pid->d_gain, fp_error - pid->last_err); + pid->last_err = fp_error; + + result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm; + result = result + (1 << (FRAC_BITS-1)); + return (signed int)fp_toint(result); +} + +static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu) +{ + pid_p_gain_set(&cpu->pid, pid_params.p_gain_pct); + pid_d_gain_set(&cpu->pid, pid_params.d_gain_pct); + pid_i_gain_set(&cpu->pid, pid_params.i_gain_pct); + + pid_reset(&cpu->pid, pid_params.setpoint, 100, pid_params.deadband, 0); +} + +static inline void intel_pstate_reset_all_pid(void) +{ + unsigned int cpu; + + for_each_online_cpu(cpu) { + if (all_cpu_data[cpu]) + intel_pstate_busy_pid_reset(all_cpu_data[cpu]); + } +} + +static inline void update_turbo_state(void) +{ + u64 misc_en; + struct cpudata *cpu; + + cpu = all_cpu_data[0]; + rdmsrl(MSR_IA32_MISC_ENABLE, misc_en); + limits->turbo_disabled = + (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE || + cpu->pstate.max_pstate == cpu->pstate.turbo_pstate); +} + +static void intel_pstate_hwp_set(void) +{ + int min, hw_min, max, hw_max, cpu, range, adj_range; + u64 value, cap; + + get_online_cpus(); + + for_each_online_cpu(cpu) { + rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap); + hw_min = HWP_LOWEST_PERF(cap); + hw_max = HWP_HIGHEST_PERF(cap); + range = hw_max - hw_min; + + rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); + adj_range = limits->min_perf_pct * range / 100; + min = hw_min + adj_range; + value &= ~HWP_MIN_PERF(~0L); + value |= HWP_MIN_PERF(min); + + adj_range = limits->max_perf_pct * range / 100; + max = hw_min + adj_range; + if (limits->no_turbo) { + hw_max = HWP_GUARANTEED_PERF(cap); + if (hw_max < max) + max = hw_max; + } + + value &= ~HWP_MAX_PERF(~0L); + value |= HWP_MAX_PERF(max); + wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value); + } + + put_online_cpus(); +} + +/************************** debugfs begin ************************/ +static int pid_param_set(void *data, u64 val) +{ + *(u32 *)data = val; + intel_pstate_reset_all_pid(); + return 0; +} + +static int pid_param_get(void *data, u64 *val) +{ + *val = *(u32 *)data; + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(fops_pid_param, pid_param_get, pid_param_set, "%llu\n"); + +struct pid_param { + char *name; + void *value; +}; + +static struct pid_param pid_files[] = { + {"sample_rate_ms", &pid_params.sample_rate_ms}, + {"d_gain_pct", &pid_params.d_gain_pct}, + {"i_gain_pct", &pid_params.i_gain_pct}, + {"deadband", &pid_params.deadband}, + {"setpoint", &pid_params.setpoint}, + {"p_gain_pct", &pid_params.p_gain_pct}, + {NULL, NULL} +}; + +static void __init intel_pstate_debug_expose_params(void) +{ + struct dentry *debugfs_parent; + int i = 0; + + if (hwp_active) + return; + debugfs_parent = debugfs_create_dir("pstate_snb", NULL); + if (IS_ERR_OR_NULL(debugfs_parent)) + return; + while (pid_files[i].name) { + debugfs_create_file(pid_files[i].name, 0660, + debugfs_parent, pid_files[i].value, + &fops_pid_param); + i++; + } +} + +/************************** debugfs end ************************/ + +/************************** sysfs begin ************************/ +#define show_one(file_name, object) \ + static ssize_t show_##file_name \ + (struct kobject *kobj, struct kobj_attribute *attr, char *buf) \ + { \ + return sprintf(buf, "%u\n", limits->object); \ + } + +static ssize_t show_turbo_pct(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct cpudata *cpu; + int total, no_turbo, turbo_pct; + uint32_t turbo_fp; + + cpu = all_cpu_data[0]; + + total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1; + no_turbo = cpu->pstate.max_pstate - cpu->pstate.min_pstate + 1; + turbo_fp = div_fp(int_tofp(no_turbo), int_tofp(total)); + turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100))); + return sprintf(buf, "%u\n", turbo_pct); +} + +static ssize_t show_num_pstates(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct cpudata *cpu; + int total; + + cpu = all_cpu_data[0]; + total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1; + return sprintf(buf, "%u\n", total); +} + +static ssize_t show_no_turbo(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + ssize_t ret; + + update_turbo_state(); + if (limits->turbo_disabled) + ret = sprintf(buf, "%u\n", limits->turbo_disabled); + else + ret = sprintf(buf, "%u\n", limits->no_turbo); + + return ret; +} + +static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + update_turbo_state(); + if (limits->turbo_disabled) { + pr_warn("intel_pstate: Turbo disabled by BIOS or unavailable on processor\n"); + return -EPERM; + } + + limits->no_turbo = clamp_t(int, input, 0, 1); + + if (hwp_active) + intel_pstate_hwp_set(); + + return count; +} + +static ssize_t store_max_perf_pct(struct kobject *a, struct kobj_attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + limits->max_sysfs_pct = clamp_t(int, input, 0 , 100); + limits->max_perf_pct = min(limits->max_policy_pct, + limits->max_sysfs_pct); + limits->max_perf_pct = max(limits->min_policy_pct, + limits->max_perf_pct); + limits->max_perf_pct = max(limits->min_perf_pct, + limits->max_perf_pct); + limits->max_perf = div_fp(int_tofp(limits->max_perf_pct), + int_tofp(100)); + + if (hwp_active) + intel_pstate_hwp_set(); + return count; +} + +static ssize_t store_min_perf_pct(struct kobject *a, struct kobj_attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + limits->min_sysfs_pct = clamp_t(int, input, 0 , 100); + limits->min_perf_pct = max(limits->min_policy_pct, + limits->min_sysfs_pct); + limits->min_perf_pct = min(limits->max_policy_pct, + limits->min_perf_pct); + limits->min_perf_pct = min(limits->max_perf_pct, + limits->min_perf_pct); + limits->min_perf = div_fp(int_tofp(limits->min_perf_pct), + int_tofp(100)); + + if (hwp_active) + intel_pstate_hwp_set(); + return count; +} + +show_one(max_perf_pct, max_perf_pct); +show_one(min_perf_pct, min_perf_pct); + +define_one_global_rw(no_turbo); +define_one_global_rw(max_perf_pct); +define_one_global_rw(min_perf_pct); +define_one_global_ro(turbo_pct); +define_one_global_ro(num_pstates); + +static struct attribute *intel_pstate_attributes[] = { + &no_turbo.attr, + &max_perf_pct.attr, + &min_perf_pct.attr, + &turbo_pct.attr, + &num_pstates.attr, + NULL +}; + +static struct attribute_group intel_pstate_attr_group = { + .attrs = intel_pstate_attributes, +}; + +static void __init intel_pstate_sysfs_expose_params(void) +{ + struct kobject *intel_pstate_kobject; + int rc; + + intel_pstate_kobject = kobject_create_and_add("intel_pstate", + &cpu_subsys.dev_root->kobj); + BUG_ON(!intel_pstate_kobject); + rc = sysfs_create_group(intel_pstate_kobject, &intel_pstate_attr_group); + BUG_ON(rc); +} +/************************** sysfs end ************************/ + +static void intel_pstate_hwp_enable(struct cpudata *cpudata) +{ + wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); +} + +static int atom_get_min_pstate(void) +{ + u64 value; + + rdmsrl(ATOM_RATIOS, value); + return (value >> 8) & 0x7F; +} + +static int atom_get_max_pstate(void) +{ + u64 value; + + rdmsrl(ATOM_RATIOS, value); + return (value >> 16) & 0x7F; +} + +static int atom_get_turbo_pstate(void) +{ + u64 value; + + rdmsrl(ATOM_TURBO_RATIOS, value); + return value & 0x7F; +} + +static void atom_set_pstate(struct cpudata *cpudata, int pstate) +{ + u64 val; + int32_t vid_fp; + u32 vid; + + val = (u64)pstate << 8; + if (limits->no_turbo && !limits->turbo_disabled) + val |= (u64)1 << 32; + + vid_fp = cpudata->vid.min + mul_fp( + int_tofp(pstate - cpudata->pstate.min_pstate), + cpudata->vid.ratio); + + vid_fp = clamp_t(int32_t, vid_fp, cpudata->vid.min, cpudata->vid.max); + vid = ceiling_fp(vid_fp); + + if (pstate > cpudata->pstate.max_pstate) + vid = cpudata->vid.turbo; + + val |= vid; + + wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val); +} + +static int silvermont_get_scaling(void) +{ + u64 value; + int i; + /* Defined in Table 35-6 from SDM (Sept 2015) */ + static int silvermont_freq_table[] = { + 83300, 100000, 133300, 116700, 80000}; + + rdmsrl(MSR_FSB_FREQ, value); + i = value & 0x7; + WARN_ON(i > 4); + + return silvermont_freq_table[i]; +} + +static int airmont_get_scaling(void) +{ + u64 value; + int i; + /* Defined in Table 35-10 from SDM (Sept 2015) */ + static int airmont_freq_table[] = { + 83300, 100000, 133300, 116700, 80000, + 93300, 90000, 88900, 87500}; + + rdmsrl(MSR_FSB_FREQ, value); + i = value & 0xF; + WARN_ON(i > 8); + + return airmont_freq_table[i]; +} + +static void atom_get_vid(struct cpudata *cpudata) +{ + u64 value; + + rdmsrl(ATOM_VIDS, value); + cpudata->vid.min = int_tofp((value >> 8) & 0x7f); + cpudata->vid.max = int_tofp((value >> 16) & 0x7f); + cpudata->vid.ratio = div_fp( + cpudata->vid.max - cpudata->vid.min, + int_tofp(cpudata->pstate.max_pstate - + cpudata->pstate.min_pstate)); + + rdmsrl(ATOM_TURBO_VIDS, value); + cpudata->vid.turbo = value & 0x7f; +} + +static int core_get_min_pstate(void) +{ + u64 value; + + rdmsrl(MSR_PLATFORM_INFO, value); + return (value >> 40) & 0xFF; +} + +static int core_get_max_pstate_physical(void) +{ + u64 value; + + rdmsrl(MSR_PLATFORM_INFO, value); + return (value >> 8) & 0xFF; +} + +static int core_get_max_pstate(void) +{ + u64 tar; + u64 plat_info; + int max_pstate; + int err; + + rdmsrl(MSR_PLATFORM_INFO, plat_info); + max_pstate = (plat_info >> 8) & 0xFF; + + err = rdmsrl_safe(MSR_TURBO_ACTIVATION_RATIO, &tar); + if (!err) { + /* Do some sanity checking for safety */ + if (plat_info & 0x600000000) { + u64 tdp_ctrl; + u64 tdp_ratio; + int tdp_msr; + + err = rdmsrl_safe(MSR_CONFIG_TDP_CONTROL, &tdp_ctrl); + if (err) + goto skip_tar; + + tdp_msr = MSR_CONFIG_TDP_NOMINAL + (tdp_ctrl & 0x3); + err = rdmsrl_safe(tdp_msr, &tdp_ratio); + if (err) + goto skip_tar; + + /* For level 1 and 2, bits[23:16] contain the ratio */ + if (tdp_ctrl) + tdp_ratio >>= 16; + + tdp_ratio &= 0xff; /* ratios are only 8 bits long */ + if (tdp_ratio - 1 == tar) { + max_pstate = tar; + pr_debug("max_pstate=TAC %x\n", max_pstate); + } else { + goto skip_tar; + } + } + } + +skip_tar: + return max_pstate; +} + +static int core_get_turbo_pstate(void) +{ + u64 value; + int nont, ret; + + rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value); + nont = core_get_max_pstate(); + ret = (value) & 255; + if (ret <= nont) + ret = nont; + return ret; +} + +static inline int core_get_scaling(void) +{ + return 100000; +} + +static void core_set_pstate(struct cpudata *cpudata, int pstate) +{ + u64 val; + + val = (u64)pstate << 8; + if (limits->no_turbo && !limits->turbo_disabled) + val |= (u64)1 << 32; + + wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val); +} + +static int knl_get_turbo_pstate(void) +{ + u64 value; + int nont, ret; + + rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value); + nont = core_get_max_pstate(); + ret = (((value) >> 8) & 0xFF); + if (ret <= nont) + ret = nont; + return ret; +} + +static struct cpu_defaults core_params = { + .pid_policy = { + .sample_rate_ms = 10, + .deadband = 0, + .setpoint = 97, + .p_gain_pct = 20, + .d_gain_pct = 0, + .i_gain_pct = 0, + }, + .funcs = { + .get_max = core_get_max_pstate, + .get_max_physical = core_get_max_pstate_physical, + .get_min = core_get_min_pstate, + .get_turbo = core_get_turbo_pstate, + .get_scaling = core_get_scaling, + .set = core_set_pstate, + }, +}; + +static struct cpu_defaults silvermont_params = { + .pid_policy = { + .sample_rate_ms = 10, + .deadband = 0, + .setpoint = 60, + .p_gain_pct = 14, + .d_gain_pct = 0, + .i_gain_pct = 4, + }, + .funcs = { + .get_max = atom_get_max_pstate, + .get_max_physical = atom_get_max_pstate, + .get_min = atom_get_min_pstate, + .get_turbo = atom_get_turbo_pstate, + .set = atom_set_pstate, + .get_scaling = silvermont_get_scaling, + .get_vid = atom_get_vid, + }, +}; + +static struct cpu_defaults airmont_params = { + .pid_policy = { + .sample_rate_ms = 10, + .deadband = 0, + .setpoint = 60, + .p_gain_pct = 14, + .d_gain_pct = 0, + .i_gain_pct = 4, + }, + .funcs = { + .get_max = atom_get_max_pstate, + .get_max_physical = atom_get_max_pstate, + .get_min = atom_get_min_pstate, + .get_turbo = atom_get_turbo_pstate, + .set = atom_set_pstate, + .get_scaling = airmont_get_scaling, + .get_vid = atom_get_vid, + }, +}; + +static struct cpu_defaults knl_params = { + .pid_policy = { + .sample_rate_ms = 10, + .deadband = 0, + .setpoint = 97, + .p_gain_pct = 20, + .d_gain_pct = 0, + .i_gain_pct = 0, + }, + .funcs = { + .get_max = core_get_max_pstate, + .get_max_physical = core_get_max_pstate_physical, + .get_min = core_get_min_pstate, + .get_turbo = knl_get_turbo_pstate, + .get_scaling = core_get_scaling, + .set = core_set_pstate, + }, +}; + +static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) +{ + int max_perf = cpu->pstate.turbo_pstate; + int max_perf_adj; + int min_perf; + + if (limits->no_turbo || limits->turbo_disabled) + max_perf = cpu->pstate.max_pstate; + + /* + * performance can be limited by user through sysfs, by cpufreq + * policy, or by cpu specific default values determined through + * experimentation. + */ + max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits->max_perf)); + *max = clamp_t(int, max_perf_adj, + cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); + + min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits->min_perf)); + *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); +} + +static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate, bool force) +{ + int max_perf, min_perf; + + if (force) { + update_turbo_state(); + + intel_pstate_get_min_max(cpu, &min_perf, &max_perf); + + pstate = clamp_t(int, pstate, min_perf, max_perf); + + if (pstate == cpu->pstate.current_pstate) + return; + } + trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); + + cpu->pstate.current_pstate = pstate; + + pstate_funcs.set(cpu, pstate); +} + +static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) +{ + cpu->pstate.min_pstate = pstate_funcs.get_min(); + cpu->pstate.max_pstate = pstate_funcs.get_max(); + cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical(); + cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(); + cpu->pstate.scaling = pstate_funcs.get_scaling(); + + if (pstate_funcs.get_vid) + pstate_funcs.get_vid(cpu); + intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false); +} + +static inline void intel_pstate_calc_busy(struct cpudata *cpu) +{ + struct sample *sample = &cpu->sample; + int64_t core_pct; + + core_pct = int_tofp(sample->aperf) * int_tofp(100); + core_pct = div64_u64(core_pct, int_tofp(sample->mperf)); + + sample->freq = fp_toint( + mul_fp(int_tofp( + cpu->pstate.max_pstate_physical * + cpu->pstate.scaling / 100), + core_pct)); + + sample->core_pct_busy = (int32_t)core_pct; +} + +static inline void intel_pstate_sample(struct cpudata *cpu) +{ + u64 aperf, mperf; + unsigned long flags; + u64 tsc; + + local_irq_save(flags); + rdmsrl(MSR_IA32_APERF, aperf); + rdmsrl(MSR_IA32_MPERF, mperf); + if (cpu->prev_mperf == mperf) { + local_irq_restore(flags); + return; + } + + tsc = rdtsc(); + local_irq_restore(flags); + + cpu->last_sample_time = cpu->sample.time; + cpu->sample.time = ktime_get(); + cpu->sample.aperf = aperf; + cpu->sample.mperf = mperf; + cpu->sample.tsc = tsc; + cpu->sample.aperf -= cpu->prev_aperf; + cpu->sample.mperf -= cpu->prev_mperf; + cpu->sample.tsc -= cpu->prev_tsc; + + intel_pstate_calc_busy(cpu); + + cpu->prev_aperf = aperf; + cpu->prev_mperf = mperf; + cpu->prev_tsc = tsc; +} + +static inline void intel_hwp_set_sample_time(struct cpudata *cpu) +{ + int delay; + + delay = msecs_to_jiffies(50); + mod_timer_pinned(&cpu->timer, jiffies + delay); +} + +static inline void intel_pstate_set_sample_time(struct cpudata *cpu) +{ + int delay; + + delay = msecs_to_jiffies(pid_params.sample_rate_ms); + mod_timer_pinned(&cpu->timer, jiffies + delay); +} + +static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) +{ + int32_t core_busy, max_pstate, current_pstate, sample_ratio; + s64 duration_us; + u32 sample_time; + + /* + * core_busy is the ratio of actual performance to max + * max_pstate is the max non turbo pstate available + * current_pstate was the pstate that was requested during + * the last sample period. + * + * We normalize core_busy, which was our actual percent + * performance to what we requested during the last sample + * period. The result will be a percentage of busy at a + * specified pstate. + */ + core_busy = cpu->sample.core_pct_busy; + max_pstate = int_tofp(cpu->pstate.max_pstate_physical); + current_pstate = int_tofp(cpu->pstate.current_pstate); + core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); + + /* + * Since we have a deferred timer, it will not fire unless + * we are in C0. So, determine if the actual elapsed time + * is significantly greater (3x) than our sample interval. If it + * is, then we were idle for a long enough period of time + * to adjust our busyness. + */ + sample_time = pid_params.sample_rate_ms * USEC_PER_MSEC; + duration_us = ktime_us_delta(cpu->sample.time, + cpu->last_sample_time); + if (duration_us > sample_time * 3) { + sample_ratio = div_fp(int_tofp(sample_time), + int_tofp(duration_us)); + core_busy = mul_fp(core_busy, sample_ratio); + } + + return core_busy; +} + +static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) +{ + int32_t busy_scaled; + struct _pid *pid; + signed int ctl; + int from; + struct sample *sample; + + from = cpu->pstate.current_pstate; + + pid = &cpu->pid; + busy_scaled = intel_pstate_get_scaled_busy(cpu); + + ctl = pid_calc(pid, busy_scaled); + + /* Negative values of ctl increase the pstate and vice versa */ + intel_pstate_set_pstate(cpu, cpu->pstate.current_pstate - ctl, true); + + sample = &cpu->sample; + trace_pstate_sample(fp_toint(sample->core_pct_busy), + fp_toint(busy_scaled), + from, + cpu->pstate.current_pstate, + sample->mperf, + sample->aperf, + sample->tsc, + sample->freq); +} + +static void intel_hwp_timer_func(unsigned long __data) +{ + struct cpudata *cpu = (struct cpudata *) __data; + + intel_pstate_sample(cpu); + intel_hwp_set_sample_time(cpu); +} + +static void intel_pstate_timer_func(unsigned long __data) +{ + struct cpudata *cpu = (struct cpudata *) __data; + + intel_pstate_sample(cpu); + + intel_pstate_adjust_busy_pstate(cpu); + + intel_pstate_set_sample_time(cpu); +} + +#define ICPU(model, policy) \ + { X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\ + (unsigned long)&policy } + +static const struct x86_cpu_id intel_pstate_cpu_ids[] = { + ICPU(0x2a, core_params), + ICPU(0x2d, core_params), + ICPU(0x37, silvermont_params), + ICPU(0x3a, core_params), + ICPU(0x3c, core_params), + ICPU(0x3d, core_params), + ICPU(0x3e, core_params), + ICPU(0x3f, core_params), + ICPU(0x45, core_params), + ICPU(0x46, core_params), + ICPU(0x47, core_params), + ICPU(0x4c, airmont_params), + ICPU(0x4e, core_params), + ICPU(0x4f, core_params), + ICPU(0x5e, core_params), + ICPU(0x56, core_params), + ICPU(0x57, knl_params), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); + +static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] = { + ICPU(0x56, core_params), + {} +}; + +static int intel_pstate_init_cpu(unsigned int cpunum) +{ + struct cpudata *cpu; + + if (!all_cpu_data[cpunum]) + all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata), + GFP_KERNEL); + if (!all_cpu_data[cpunum]) + return -ENOMEM; + + cpu = all_cpu_data[cpunum]; + + cpu->cpu = cpunum; + + if (hwp_active) + intel_pstate_hwp_enable(cpu); + + intel_pstate_get_cpu_pstates(cpu); + + init_timer_deferrable(&cpu->timer); + cpu->timer.data = (unsigned long)cpu; + cpu->timer.expires = jiffies + HZ/100; + + if (!hwp_active) + cpu->timer.function = intel_pstate_timer_func; + else + cpu->timer.function = intel_hwp_timer_func; + + intel_pstate_busy_pid_reset(cpu); + intel_pstate_sample(cpu); + + add_timer_on(&cpu->timer, cpunum); + + pr_debug("intel_pstate: controlling: cpu %d\n", cpunum); + + return 0; +} + +static unsigned int intel_pstate_get(unsigned int cpu_num) +{ + struct sample *sample; + struct cpudata *cpu; + + cpu = all_cpu_data[cpu_num]; + if (!cpu) + return 0; + sample = &cpu->sample; + return sample->freq; +} + +static int intel_pstate_set_policy(struct cpufreq_policy *policy) +{ + if (!policy->cpuinfo.max_freq) + return -ENODEV; + + if (policy->policy == CPUFREQ_POLICY_PERFORMANCE && + policy->max >= policy->cpuinfo.max_freq) { + pr_debug("intel_pstate: set performance\n"); + limits = &performance_limits; + if (hwp_active) + intel_pstate_hwp_set(); + return 0; + } + + pr_debug("intel_pstate: set powersave\n"); + limits = &powersave_limits; + limits->min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq; + limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, 0 , 100); + limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100, + policy->cpuinfo.max_freq); + limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0 , 100); + + /* Normalize user input to [min_policy_pct, max_policy_pct] */ + limits->min_perf_pct = max(limits->min_policy_pct, + limits->min_sysfs_pct); + limits->min_perf_pct = min(limits->max_policy_pct, + limits->min_perf_pct); + limits->max_perf_pct = min(limits->max_policy_pct, + limits->max_sysfs_pct); + limits->max_perf_pct = max(limits->min_policy_pct, + limits->max_perf_pct); + limits->max_perf = round_up(limits->max_perf, FRAC_BITS); + + /* Make sure min_perf_pct <= max_perf_pct */ + limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct); + + limits->min_perf = div_fp(int_tofp(limits->min_perf_pct), + int_tofp(100)); + limits->max_perf = div_fp(int_tofp(limits->max_perf_pct), + int_tofp(100)); + + if (hwp_active) + intel_pstate_hwp_set(); + + return 0; +} + +static int intel_pstate_verify_policy(struct cpufreq_policy *policy) +{ + cpufreq_verify_within_cpu_limits(policy); + + if (policy->policy != CPUFREQ_POLICY_POWERSAVE && + policy->policy != CPUFREQ_POLICY_PERFORMANCE) + return -EINVAL; + + return 0; +} + +static void intel_pstate_stop_cpu(struct cpufreq_policy *policy) +{ + int cpu_num = policy->cpu; + struct cpudata *cpu = all_cpu_data[cpu_num]; + + pr_debug("intel_pstate: CPU %d exiting\n", cpu_num); + + del_timer_sync(&all_cpu_data[cpu_num]->timer); + if (hwp_active) + return; + + intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false); +} + +static int intel_pstate_cpu_init(struct cpufreq_policy *policy) +{ + struct cpudata *cpu; + int rc; + + rc = intel_pstate_init_cpu(policy->cpu); + if (rc) + return rc; + + cpu = all_cpu_data[policy->cpu]; + + if (limits->min_perf_pct == 100 && limits->max_perf_pct == 100) + policy->policy = CPUFREQ_POLICY_PERFORMANCE; + else + policy->policy = CPUFREQ_POLICY_POWERSAVE; + + policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling; + policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling; + + /* cpuinfo and default policy values */ + policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; + policy->cpuinfo.max_freq = + cpu->pstate.turbo_pstate * cpu->pstate.scaling; + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + cpumask_set_cpu(policy->cpu, policy->cpus); + + return 0; +} + +static struct cpufreq_driver intel_pstate_driver = { + .flags = CPUFREQ_CONST_LOOPS, + .verify = intel_pstate_verify_policy, + .setpolicy = intel_pstate_set_policy, + .get = intel_pstate_get, + .init = intel_pstate_cpu_init, + .stop_cpu = intel_pstate_stop_cpu, + .name = "intel_pstate", +}; + +static int __initdata no_load; +static int __initdata no_hwp; +static int __initdata hwp_only; +static unsigned int force_load; + +static int intel_pstate_msrs_not_valid(void) +{ + if (!pstate_funcs.get_max() || + !pstate_funcs.get_min() || + !pstate_funcs.get_turbo()) + return -ENODEV; + + return 0; +} + +static void copy_pid_params(struct pstate_adjust_policy *policy) +{ + pid_params.sample_rate_ms = policy->sample_rate_ms; + pid_params.p_gain_pct = policy->p_gain_pct; + pid_params.i_gain_pct = policy->i_gain_pct; + pid_params.d_gain_pct = policy->d_gain_pct; + pid_params.deadband = policy->deadband; + pid_params.setpoint = policy->setpoint; +} + +static void copy_cpu_funcs(struct pstate_funcs *funcs) +{ + pstate_funcs.get_max = funcs->get_max; + pstate_funcs.get_max_physical = funcs->get_max_physical; + pstate_funcs.get_min = funcs->get_min; + pstate_funcs.get_turbo = funcs->get_turbo; + pstate_funcs.get_scaling = funcs->get_scaling; + pstate_funcs.set = funcs->set; + pstate_funcs.get_vid = funcs->get_vid; +} + +#if IS_ENABLED(CONFIG_ACPI) +#include + +static bool intel_pstate_no_acpi_pss(void) +{ + int i; + + for_each_possible_cpu(i) { + acpi_status status; + union acpi_object *pss; + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + struct acpi_processor *pr = per_cpu(processors, i); + + if (!pr) + continue; + + status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer); + if (ACPI_FAILURE(status)) + continue; + + pss = buffer.pointer; + if (pss && pss->type == ACPI_TYPE_PACKAGE) { + kfree(pss); + return false; + } + + kfree(pss); + } + + return true; +} + +static bool intel_pstate_has_acpi_ppc(void) +{ + int i; + + for_each_possible_cpu(i) { + struct acpi_processor *pr = per_cpu(processors, i); + + if (!pr) + continue; + if (acpi_has_method(pr->handle, "_PPC")) + return true; + } + return false; +} + +enum { + PSS, + PPC, +}; + +struct hw_vendor_info { + u16 valid; + char oem_id[ACPI_OEM_ID_SIZE]; + char oem_table_id[ACPI_OEM_TABLE_ID_SIZE]; + int oem_pwr_table; +}; + +/* Hardware vendor-specific info that has its own power management modes */ +static struct hw_vendor_info vendor_info[] = { + {1, "HP ", "ProLiant", PSS}, + {1, "ORACLE", "X4-2 ", PPC}, + {1, "ORACLE", "X4-2L ", PPC}, + {1, "ORACLE", "X4-2B ", PPC}, + {1, "ORACLE", "X3-2 ", PPC}, + {1, "ORACLE", "X3-2L ", PPC}, + {1, "ORACLE", "X3-2B ", PPC}, + {1, "ORACLE", "X4470M2 ", PPC}, + {1, "ORACLE", "X4270M3 ", PPC}, + {1, "ORACLE", "X4270M2 ", PPC}, + {1, "ORACLE", "X4170M2 ", PPC}, + {1, "ORACLE", "X4170 M3", PPC}, + {1, "ORACLE", "X4275 M3", PPC}, + {1, "ORACLE", "X6-2 ", PPC}, + {1, "ORACLE", "Sudbury ", PPC}, + {0, "", ""}, +}; + +static bool intel_pstate_platform_pwr_mgmt_exists(void) +{ + struct acpi_table_header hdr; + struct hw_vendor_info *v_info; + const struct x86_cpu_id *id; + u64 misc_pwr; + + id = x86_match_cpu(intel_pstate_cpu_oob_ids); + if (id) { + rdmsrl(MSR_MISC_PWR_MGMT, misc_pwr); + if ( misc_pwr & (1 << 8)) + return true; + } + + if (acpi_disabled || + ACPI_FAILURE(acpi_get_table_header(ACPI_SIG_FADT, 0, &hdr))) + return false; + + for (v_info = vendor_info; v_info->valid; v_info++) { + if (!strncmp(hdr.oem_id, v_info->oem_id, ACPI_OEM_ID_SIZE) && + !strncmp(hdr.oem_table_id, v_info->oem_table_id, + ACPI_OEM_TABLE_ID_SIZE)) + switch (v_info->oem_pwr_table) { + case PSS: + return intel_pstate_no_acpi_pss(); + case PPC: + return intel_pstate_has_acpi_ppc() && + (!force_load); + } + } + + return false; +} +#else /* CONFIG_ACPI not enabled */ +static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; } +static inline bool intel_pstate_has_acpi_ppc(void) { return false; } +#endif /* CONFIG_ACPI */ + +static const struct x86_cpu_id hwp_support_ids[] __initconst = { + { X86_VENDOR_INTEL, 6, X86_MODEL_ANY, X86_FEATURE_HWP }, + {} +}; + +static int __init intel_pstate_init(void) +{ + int cpu, rc = 0; + const struct x86_cpu_id *id; + struct cpu_defaults *cpu_def; + + if (no_load) + return -ENODEV; + + if (x86_match_cpu(hwp_support_ids) && !no_hwp) { + copy_cpu_funcs(&core_params.funcs); + hwp_active++; + goto hwp_cpu_matched; + } + + id = x86_match_cpu(intel_pstate_cpu_ids); + if (!id) + return -ENODEV; + + cpu_def = (struct cpu_defaults *)id->driver_data; + + copy_pid_params(&cpu_def->pid_policy); + copy_cpu_funcs(&cpu_def->funcs); + + if (intel_pstate_msrs_not_valid()) + return -ENODEV; + +hwp_cpu_matched: + /* + * The Intel pstate driver will be ignored if the platform + * firmware has its own power management modes. + */ + if (intel_pstate_platform_pwr_mgmt_exists()) + return -ENODEV; + + pr_info("Intel P-state driver initializing.\n"); + + all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus()); + if (!all_cpu_data) + return -ENOMEM; + + if (!hwp_active && hwp_only) + goto out; + + rc = cpufreq_register_driver(&intel_pstate_driver); + if (rc) + goto out; + + intel_pstate_debug_expose_params(); + intel_pstate_sysfs_expose_params(); + + if (hwp_active) + pr_info("intel_pstate: HWP enabled\n"); + + return rc; +out: + get_online_cpus(); + for_each_online_cpu(cpu) { + if (all_cpu_data[cpu]) { + del_timer_sync(&all_cpu_data[cpu]->timer); + kfree(all_cpu_data[cpu]); + } + } + + put_online_cpus(); + vfree(all_cpu_data); + return -ENODEV; +} +device_initcall(intel_pstate_init); + +static int __init intel_pstate_setup(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "disable")) + no_load = 1; + if (!strcmp(str, "no_hwp")) { + pr_info("intel_pstate: HWP disabled\n"); + no_hwp = 1; + } + if (!strcmp(str, "force")) + force_load = 1; + if (!strcmp(str, "hwp_only")) + hwp_only = 1; + return 0; +} +early_param("intel_pstate", intel_pstate_setup); + +MODULE_AUTHOR("Dirk Brandewie "); +MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors"); +MODULE_LICENSE("GPL"); diff --git a/v1000-4.4.180/cpufreq_conservative.ko b/v1000-4.4.180/cpufreq_conservative.ko new file mode 100644 index 000000000..560f1cf53 Binary files /dev/null and b/v1000-4.4.180/cpufreq_conservative.ko differ diff --git a/v1000-4.4.180/cpufreq_ondemand.ko b/v1000-4.4.180/cpufreq_ondemand.ko new file mode 100644 index 000000000..522a69249 Binary files /dev/null and b/v1000-4.4.180/cpufreq_ondemand.ko differ diff --git a/v1000-4.4.180/cpufreq_userspace.ko b/v1000-4.4.180/cpufreq_userspace.ko new file mode 100644 index 000000000..2f15c111b Binary files /dev/null and b/v1000-4.4.180/cpufreq_userspace.ko differ