diff options
Diffstat (limited to 'extras/recipes-kernel/linux/linux-omap-2.6.39/pm/linux-omap-2.6.39-ti-pm-wip-cpufreq-hotplug/0002-cpufreq-introduce-hotplug-governor.patch')
-rw-r--r-- | extras/recipes-kernel/linux/linux-omap-2.6.39/pm/linux-omap-2.6.39-ti-pm-wip-cpufreq-hotplug/0002-cpufreq-introduce-hotplug-governor.patch | 879 |
1 files changed, 879 insertions, 0 deletions
diff --git a/extras/recipes-kernel/linux/linux-omap-2.6.39/pm/linux-omap-2.6.39-ti-pm-wip-cpufreq-hotplug/0002-cpufreq-introduce-hotplug-governor.patch b/extras/recipes-kernel/linux/linux-omap-2.6.39/pm/linux-omap-2.6.39-ti-pm-wip-cpufreq-hotplug/0002-cpufreq-introduce-hotplug-governor.patch new file mode 100644 index 00000000..731906cc --- /dev/null +++ b/extras/recipes-kernel/linux/linux-omap-2.6.39/pm/linux-omap-2.6.39-ti-pm-wip-cpufreq-hotplug/0002-cpufreq-introduce-hotplug-governor.patch @@ -0,0 +1,879 @@ +From e4c777d8314d7925e4895f00b3a7ebd64a4d830b Mon Sep 17 00:00:00 2001 +From: Mike Turquette <mturquette@ti.com> +Date: Tue, 17 May 2011 09:43:09 -0500 +Subject: [PATCH 2/2] cpufreq: introduce hotplug governor + +The "hotplug" governor scales CPU frequency based on load, similar to +"ondemand". It scales up to the highest frequency when "up_threshold" +is crossed and scales down one frequency at a time when "down_threshold" +is crossed. Unlike those governors, target frequencies are determined +by directly accessing the CPUfreq frequency table, instead of taking +some percentage of maximum available frequency. + +The key difference in the "hotplug" governor is that it will disable +auxillary CPUs when the system is very idle, and enable them again once +the system becomes busy. This is achieved by averaging load over +multiple sampling periods; if CPUs were online or offlined based on a +single sampling period then thrashing will occur. + +Sysfs entries exist for "hotplug_in_sampling_periods" and for +"hotplug_out_sampling_periods" which determine how many consecutive +periods get averaged to determine if auxillery CPUs should be onlined or +offlined. Defaults are 5 periods and 20 periods respectively. +Otherwise the standard sysfs entries you might find for "ondemand" and +"conservative" governors are there. + +To use this governor it is assumed that your CPUfreq driver has +populated the CPUfreq table, CONFIG_NO_HZ is enabled and +CONFIG_HOTPLUG_CPU is enabled. + +Changes in V2: + Corrected default sampling periods + Optimized load history array resizing + Maintain load history when resizing array + Add locking to dbs_check_cpu + Switch from enable_nonboot_cpus to cpu_up + Switch from disable_nonboot_cpus to down_cpu + Fix some printks + Coding style around for-loops + +Signed-off-by: Mike Turquette <mturquette@ti.com> +Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com> +Signed-off-by: Koen Kooi <koen@dominion.thruhere.net> +--- + Documentation/cpu-freq/governors.txt | 28 ++ + drivers/cpufreq/Kconfig | 33 ++ + drivers/cpufreq/Makefile | 1 + + drivers/cpufreq/cpufreq_hotplug.c | 705 ++++++++++++++++++++++++++++++++++ + include/linux/cpufreq.h | 3 + + 5 files changed, 770 insertions(+), 0 deletions(-) + create mode 100644 drivers/cpufreq/cpufreq_hotplug.c + +diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt +index e74d0a2..c2e3d3d 100644 +--- a/Documentation/cpu-freq/governors.txt ++++ b/Documentation/cpu-freq/governors.txt +@@ -193,6 +193,34 @@ governor but for the opposite direction. For example when set to its + default value of '20' it means that if the CPU usage needs to be below + 20% between samples to have the frequency decreased. + ++ ++2.6 Hotplug ++----------- ++ ++The CPUfreq governor "hotplug" operates similary to "ondemand" and ++"conservative". It's decisions are based primarily on CPU load. Like ++"ondemand" the "hotplug" governor will ramp up to the highest frequency ++once the run-time tunable "up_threshold" parameter is crossed. Like ++"conservative", the "hotplug" governor exports a "down_threshold" ++parameter that is also tunable at run-time. When the "down_threshold" ++is crossed the CPU transitions to the next lowest frequency in the ++CPUfreq frequency table instead of decrementing the frequency based on a ++percentage of maximum load. ++ ++The main reason "hotplug" governor exists is for architectures requiring ++that only the master CPU be online in order to hit low-power states ++(C-states). OMAP4 is one such example of this. The "hotplug" governor ++is also helpful in reducing thermal output in devices with tight thermal ++constraints. ++ ++Auxillary CPUs are onlined/offline based on CPU load, but the decision ++to do so is made after averaging several sampling windows. This is to ++reduce CPU hotplug "thrashing", which can be caused by normal system ++entropy and leads to lots of spurious plug-in and plug-out transitions. ++The number of sampling periods averaged together is tunable via the ++"hotplug_in_sampling_periods" and "hotplug_out_sampling_periods" ++run-time tunable parameters. ++ + 3. The Governor Interface in the CPUfreq Core + ============================================= + +diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig +index ca8ee80..c716a0e 100644 +--- a/drivers/cpufreq/Kconfig ++++ b/drivers/cpufreq/Kconfig +@@ -110,6 +110,19 @@ config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE + Be aware that not all cpufreq drivers support the conservative + governor. If unsure have a look at the help section of the + driver. Fallback governor will be the performance governor. ++ ++config CPU_FREQ_DEFAULT_GOV_HOTPLUG ++ bool "hotplug" ++ select CPU_FREQ_GOV_HOTPLUG ++ select CPU_FREQ_GOV_PERFORMANCE ++ help ++ Use the CPUFreq governor 'hotplug' as default. This allows you ++ to get a full dynamic frequency capable system with CPU ++ hotplug support by simply loading your cpufreq low-level ++ hardware driver. Be aware that not all cpufreq drivers ++ support the hotplug governor. If unsure have a look at ++ the help section of the driver. Fallback governor will be the ++ performance governor. + endchoice + + config CPU_FREQ_GOV_PERFORMANCE +@@ -190,4 +203,24 @@ config CPU_FREQ_GOV_CONSERVATIVE + + If in doubt, say N. + ++config CPU_FREQ_GOV_HOTPLUG ++ tristate "'hotplug' cpufreq governor" ++ depends on CPU_FREQ && NO_HZ && HOTPLUG_CPU ++ help ++ 'hotplug' - this driver mimics the frequency scaling behavior ++ in 'ondemand', but with several key differences. First is ++ that frequency transitions use the CPUFreq table directly, ++ instead of incrementing in a percentage of the maximum ++ available frequency. Second 'hotplug' will offline auxillary ++ CPUs when the system is idle, and online those CPUs once the ++ system becomes busy again. This last feature is needed for ++ architectures which transition to low power states when only ++ the "master" CPU is online, or for thermally constrained ++ devices. ++ ++ If you don't have one of these architectures or devices, use ++ 'ondemand' instead. ++ ++ If in doubt, say N. ++ + endif # CPU_FREQ +diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile +index 71fc3b4..05d564c 100644 +--- a/drivers/cpufreq/Makefile ++++ b/drivers/cpufreq/Makefile +@@ -9,6 +9,7 @@ obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE) += cpufreq_powersave.o + obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace.o + obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND) += cpufreq_ondemand.o + obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE) += cpufreq_conservative.o ++obj-$(CONFIG_CPU_FREQ_GOV_HOTPLUG) += cpufreq_hotplug.o + + # CPUfreq cross-arch helpers + obj-$(CONFIG_CPU_FREQ_TABLE) += freq_table.o +diff --git a/drivers/cpufreq/cpufreq_hotplug.c b/drivers/cpufreq/cpufreq_hotplug.c +new file mode 100644 +index 0000000..85aa6d2 +--- /dev/null ++++ b/drivers/cpufreq/cpufreq_hotplug.c +@@ -0,0 +1,705 @@ ++/* ++ * CPUFreq hotplug governor ++ * ++ * Copyright (C) 2010 Texas Instruments, Inc. ++ * Mike Turquette <mturquette@ti.com> ++ * Santosh Shilimkar <santosh.shilimkar@ti.com> ++ * ++ * Based on ondemand governor ++ * Copyright (C) 2001 Russell King ++ * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>, ++ * Jun Nakajima <jun.nakajima@intel.com> ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include <linux/kernel.h> ++#include <linux/module.h> ++#include <linux/init.h> ++#include <linux/cpufreq.h> ++#include <linux/cpu.h> ++#include <linux/jiffies.h> ++#include <linux/kernel_stat.h> ++#include <linux/mutex.h> ++#include <linux/hrtimer.h> ++#include <linux/tick.h> ++#include <linux/ktime.h> ++#include <linux/sched.h> ++#include <linux/err.h> ++#include <linux/slab.h> ++ ++/* greater than 80% avg load across online CPUs increases frequency */ ++#define DEFAULT_UP_FREQ_MIN_LOAD (80) ++ ++/* less than 20% avg load across online CPUs decreases frequency */ ++#define DEFAULT_DOWN_FREQ_MAX_LOAD (20) ++ ++/* default sampling period (uSec) is bogus; 10x ondemand's default for x86 */ ++#define DEFAULT_SAMPLING_PERIOD (100000) ++ ++/* default number of sampling periods to average before hotplug-in decision */ ++#define DEFAULT_HOTPLUG_IN_SAMPLING_PERIODS (5) ++ ++/* default number of sampling periods to average before hotplug-out decision */ ++#define DEFAULT_HOTPLUG_OUT_SAMPLING_PERIODS (20) ++ ++static void do_dbs_timer(struct work_struct *work); ++static int cpufreq_governor_dbs(struct cpufreq_policy *policy, ++ unsigned int event); ++ ++#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_HOTPLUG ++static ++#endif ++struct cpufreq_governor cpufreq_gov_hotplug = { ++ .name = "hotplug", ++ .governor = cpufreq_governor_dbs, ++ .owner = THIS_MODULE, ++}; ++ ++struct cpu_dbs_info_s { ++ cputime64_t prev_cpu_idle; ++ cputime64_t prev_cpu_wall; ++ cputime64_t prev_cpu_nice; ++ struct cpufreq_policy *cur_policy; ++ struct delayed_work work; ++ struct cpufreq_frequency_table *freq_table; ++ int cpu; ++ /* ++ * percpu mutex that serializes governor limit change with ++ * do_dbs_timer invocation. We do not want do_dbs_timer to run ++ * when user is changing the governor or limits. ++ */ ++ struct mutex timer_mutex; ++}; ++static DEFINE_PER_CPU(struct cpu_dbs_info_s, hp_cpu_dbs_info); ++ ++static unsigned int dbs_enable; /* number of CPUs using this policy */ ++ ++/* ++ * dbs_mutex protects data in dbs_tuners_ins from concurrent changes on ++ * different CPUs. It protects dbs_enable in governor start/stop. ++ */ ++static DEFINE_MUTEX(dbs_mutex); ++ ++static struct workqueue_struct *khotplug_wq; ++ ++static struct dbs_tuners { ++ unsigned int sampling_rate; ++ unsigned int up_threshold; ++ unsigned int down_threshold; ++ unsigned int hotplug_in_sampling_periods; ++ unsigned int hotplug_out_sampling_periods; ++ unsigned int hotplug_load_index; ++ unsigned int *hotplug_load_history; ++ unsigned int ignore_nice; ++ unsigned int io_is_busy; ++} dbs_tuners_ins = { ++ .sampling_rate = DEFAULT_SAMPLING_PERIOD, ++ .up_threshold = DEFAULT_UP_FREQ_MIN_LOAD, ++ .down_threshold = DEFAULT_DOWN_FREQ_MAX_LOAD, ++ .hotplug_in_sampling_periods = DEFAULT_HOTPLUG_IN_SAMPLING_PERIODS, ++ .hotplug_out_sampling_periods = DEFAULT_HOTPLUG_OUT_SAMPLING_PERIODS, ++ .hotplug_load_index = 0, ++ .ignore_nice = 0, ++ .io_is_busy = 0, ++}; ++ ++/* ++ * A corner case exists when switching io_is_busy at run-time: comparing idle ++ * times from a non-io_is_busy period to an io_is_busy period (or vice-versa) ++ * will misrepresent the actual change in system idleness. We ignore this ++ * corner case: enabling io_is_busy might cause freq increase and disabling ++ * might cause freq decrease, which probably matches the original intent. ++ */ ++static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) ++{ ++ u64 idle_time; ++ u64 iowait_time; ++ ++ /* cpufreq-hotplug always assumes CONFIG_NO_HZ */ ++ idle_time = get_cpu_idle_time_us(cpu, wall); ++ ++ /* add time spent doing I/O to idle time */ ++ if (dbs_tuners_ins.io_is_busy) { ++ iowait_time = get_cpu_iowait_time_us(cpu, wall); ++ /* cpufreq-hotplug always assumes CONFIG_NO_HZ */ ++ if (iowait_time != -1ULL && idle_time >= iowait_time) ++ idle_time -= iowait_time; ++ } ++ ++ return idle_time; ++} ++ ++/************************** sysfs interface ************************/ ++ ++/* XXX look at global sysfs macros in cpufreq.h, can those be used here? */ ++ ++/* cpufreq_hotplug Governor Tunables */ ++#define show_one(file_name, object) \ ++static ssize_t show_##file_name \ ++(struct kobject *kobj, struct attribute *attr, char *buf) \ ++{ \ ++ return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ ++} ++show_one(sampling_rate, sampling_rate); ++show_one(up_threshold, up_threshold); ++show_one(down_threshold, down_threshold); ++show_one(hotplug_in_sampling_periods, hotplug_in_sampling_periods); ++show_one(hotplug_out_sampling_periods, hotplug_out_sampling_periods); ++show_one(ignore_nice_load, ignore_nice); ++show_one(io_is_busy, io_is_busy); ++ ++static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, ++ const char *buf, size_t count) ++{ ++ unsigned int input; ++ int ret; ++ ret = sscanf(buf, "%u", &input); ++ if (ret != 1) ++ return -EINVAL; ++ ++ mutex_lock(&dbs_mutex); ++ dbs_tuners_ins.sampling_rate = input; ++ mutex_unlock(&dbs_mutex); ++ ++ return count; ++} ++ ++static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, ++ const char *buf, size_t count) ++{ ++ unsigned int input; ++ int ret; ++ ret = sscanf(buf, "%u", &input); ++ ++ if (ret != 1 || input <= dbs_tuners_ins.down_threshold) { ++ return -EINVAL; ++ } ++ ++ mutex_lock(&dbs_mutex); ++ dbs_tuners_ins.up_threshold = input; ++ mutex_unlock(&dbs_mutex); ++ ++ return count; ++} ++ ++static ssize_t store_down_threshold(struct kobject *a, struct attribute *b, ++ const char *buf, size_t count) ++{ ++ unsigned int input; ++ int ret; ++ ret = sscanf(buf, "%u", &input); ++ ++ if (ret != 1 || input >= dbs_tuners_ins.up_threshold) { ++ return -EINVAL; ++ } ++ ++ mutex_lock(&dbs_mutex); ++ dbs_tuners_ins.down_threshold = input; ++ mutex_unlock(&dbs_mutex); ++ ++ return count; ++} ++ ++static ssize_t store_hotplug_in_sampling_periods(struct kobject *a, ++ struct attribute *b, const char *buf, size_t count) ++{ ++ unsigned int input; ++ unsigned int *temp; ++ unsigned int max_windows; ++ int ret; ++ ret = sscanf(buf, "%u", &input); ++ ++ if (ret != 1) ++ return -EINVAL; ++ ++ /* already using this value, bail out */ ++ if (input == dbs_tuners_ins.hotplug_in_sampling_periods) ++ return count; ++ ++ mutex_lock(&dbs_mutex); ++ ret = count; ++ max_windows = max(dbs_tuners_ins.hotplug_in_sampling_periods, ++ dbs_tuners_ins.hotplug_out_sampling_periods); ++ ++ /* no need to resize array */ ++ if (input <= max_windows) { ++ dbs_tuners_ins.hotplug_in_sampling_periods = input; ++ goto out; ++ } ++ ++ /* resize array */ ++ temp = kmalloc((sizeof(unsigned int) * input), GFP_KERNEL); ++ ++ if (!temp || IS_ERR(temp)) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ memcpy(temp, dbs_tuners_ins.hotplug_load_history, ++ (max_windows * sizeof(unsigned int))); ++ kfree(dbs_tuners_ins.hotplug_load_history); ++ ++ /* replace old buffer, old number of sampling periods & old index */ ++ dbs_tuners_ins.hotplug_load_history = temp; ++ dbs_tuners_ins.hotplug_in_sampling_periods = input; ++ dbs_tuners_ins.hotplug_load_index = max_windows; ++out: ++ mutex_unlock(&dbs_mutex); ++ ++ return ret; ++} ++ ++static ssize_t store_hotplug_out_sampling_periods(struct kobject *a, ++ struct attribute *b, const char *buf, size_t count) ++{ ++ unsigned int input; ++ unsigned int *temp; ++ unsigned int max_windows; ++ int ret; ++ ret = sscanf(buf, "%u", &input); ++ ++ if (ret != 1) ++ return -EINVAL; ++ ++ /* already using this value, bail out */ ++ if (input == dbs_tuners_ins.hotplug_out_sampling_periods) ++ return count; ++ ++ mutex_lock(&dbs_mutex); ++ ret = count; ++ max_windows = max(dbs_tuners_ins.hotplug_in_sampling_periods, ++ dbs_tuners_ins.hotplug_out_sampling_periods); ++ ++ /* no need to resize array */ ++ if (input <= max_windows) { ++ dbs_tuners_ins.hotplug_out_sampling_periods = input; ++ goto out; ++ } ++ ++ /* resize array */ ++ temp = kmalloc((sizeof(unsigned int) * input), GFP_KERNEL); ++ ++ if (!temp || IS_ERR(temp)) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ memcpy(temp, dbs_tuners_ins.hotplug_load_history, ++ (max_windows * sizeof(unsigned int))); ++ kfree(dbs_tuners_ins.hotplug_load_history); ++ ++ /* replace old buffer, old number of sampling periods & old index */ ++ dbs_tuners_ins.hotplug_load_history = temp; ++ dbs_tuners_ins.hotplug_out_sampling_periods = input; ++ dbs_tuners_ins.hotplug_load_index = max_windows; ++out: ++ mutex_unlock(&dbs_mutex); ++ ++ return ret; ++} ++ ++static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, ++ const char *buf, size_t count) ++{ ++ unsigned int input; ++ int ret; ++ ++ unsigned int j; ++ ++ ret = sscanf(buf, "%u", &input); ++ if (ret != 1) ++ return -EINVAL; ++ ++ if (input > 1) ++ input = 1; ++ ++ mutex_lock(&dbs_mutex); ++ if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */ ++ mutex_unlock(&dbs_mutex); ++ return count; ++ } ++ dbs_tuners_ins.ignore_nice = input; ++ ++ /* we need to re-evaluate prev_cpu_idle */ ++ for_each_online_cpu(j) { ++ struct cpu_dbs_info_s *dbs_info; ++ dbs_info = &per_cpu(hp_cpu_dbs_info, j); ++ dbs_info->prev_cpu_idle = get_cpu_idle_time(j, ++ &dbs_info->prev_cpu_wall); ++ if (dbs_tuners_ins.ignore_nice) ++ dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice; ++ ++ } ++ mutex_unlock(&dbs_mutex); ++ ++ return count; ++} ++ ++static ssize_t store_io_is_busy(struct kobject *a, struct attribute *b, ++ const char *buf, size_t count) ++{ ++ unsigned int input; ++ int ret; ++ ++ ret = sscanf(buf, "%u", &input); ++ if (ret != 1) ++ return -EINVAL; ++ ++ mutex_lock(&dbs_mutex); ++ dbs_tuners_ins.io_is_busy = !!input; ++ mutex_unlock(&dbs_mutex); ++ ++ return count; ++} ++ ++define_one_global_rw(sampling_rate); ++define_one_global_rw(up_threshold); ++define_one_global_rw(down_threshold); ++define_one_global_rw(hotplug_in_sampling_periods); ++define_one_global_rw(hotplug_out_sampling_periods); ++define_one_global_rw(ignore_nice_load); ++define_one_global_rw(io_is_busy); ++ ++static struct attribute *dbs_attributes[] = { ++ &sampling_rate.attr, ++ &up_threshold.attr, ++ &down_threshold.attr, ++ &hotplug_in_sampling_periods.attr, ++ &hotplug_out_sampling_periods.attr, ++ &ignore_nice_load.attr, ++ &io_is_busy.attr, ++ NULL ++}; ++ ++static struct attribute_group dbs_attr_group = { ++ .attrs = dbs_attributes, ++ .name = "hotplug", ++}; ++ ++/************************** sysfs end ************************/ ++ ++static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) ++{ ++ /* combined load of all enabled CPUs */ ++ unsigned int total_load = 0; ++ /* single largest CPU load */ ++ unsigned int max_load = 0; ++ /* average load across all enabled CPUs */ ++ unsigned int avg_load = 0; ++ /* average load across multiple sampling periods for hotplug events */ ++ unsigned int hotplug_in_avg_load = 0; ++ unsigned int hotplug_out_avg_load = 0; ++ /* number of sampling periods averaged for hotplug decisions */ ++ unsigned int periods; ++ ++ struct cpufreq_policy *policy; ++ unsigned int index = 0; ++ unsigned int i, j; ++ ++ policy = this_dbs_info->cur_policy; ++ ++ /* ++ * cpu load accounting ++ * get highest load, total load and average load across all CPUs ++ */ ++ for_each_cpu(j, policy->cpus) { ++ unsigned int load; ++ unsigned int idle_time, wall_time; ++ cputime64_t cur_wall_time, cur_idle_time; ++ struct cpu_dbs_info_s *j_dbs_info; ++ ++ j_dbs_info = &per_cpu(hp_cpu_dbs_info, j); ++ ++ /* update both cur_idle_time and cur_wall_time */ ++ cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); ++ ++ /* how much wall time has passed since last iteration? */ ++ wall_time = (unsigned int) cputime64_sub(cur_wall_time, ++ j_dbs_info->prev_cpu_wall); ++ j_dbs_info->prev_cpu_wall = cur_wall_time; ++ ++ /* how much idle time has passed since last iteration? */ ++ idle_time = (unsigned int) cputime64_sub(cur_idle_time, ++ j_dbs_info->prev_cpu_idle); ++ j_dbs_info->prev_cpu_idle = cur_idle_time; ++ ++ if (unlikely(!wall_time || wall_time < idle_time)) ++ continue; ++ ++ /* load is the percentage of time not spent in idle */ ++ load = 100 * (wall_time - idle_time) / wall_time; ++ ++ /* keep track of combined load across all CPUs */ ++ total_load += load; ++ ++ /* keep track of highest single load across all CPUs */ ++ if (load > max_load) ++ max_load = load; ++ } ++ ++ /* calculate the average load across all related CPUs */ ++ avg_load = total_load / num_online_cpus(); ++ ++ ++ /* ++ * hotplug load accounting ++ * average load over multiple sampling periods ++ */ ++ ++ /* how many sampling periods do we use for hotplug decisions? */ ++ periods = max(dbs_tuners_ins.hotplug_in_sampling_periods, ++ dbs_tuners_ins.hotplug_out_sampling_periods); ++ ++ /* store avg_load in the circular buffer */ ++ dbs_tuners_ins.hotplug_load_history[dbs_tuners_ins.hotplug_load_index] ++ = avg_load; ++ ++ /* compute average load across in & out sampling periods */ ++ for (i = 0, j = dbs_tuners_ins.hotplug_load_index; ++ i < periods; i++, j--) { ++ if (i < dbs_tuners_ins.hotplug_in_sampling_periods) ++ hotplug_in_avg_load += ++ dbs_tuners_ins.hotplug_load_history[j]; ++ if (i < dbs_tuners_ins.hotplug_out_sampling_periods) ++ hotplug_out_avg_load += ++ dbs_tuners_ins.hotplug_load_history[j]; ++ ++ if (j == 0) ++ j = periods; ++ } ++ ++ hotplug_in_avg_load = hotplug_in_avg_load / ++ dbs_tuners_ins.hotplug_in_sampling_periods; ++ ++ hotplug_out_avg_load = hotplug_out_avg_load / ++ dbs_tuners_ins.hotplug_out_sampling_periods; ++ ++ /* return to first element if we're at the circular buffer's end */ ++ if (++dbs_tuners_ins.hotplug_load_index == periods) ++ dbs_tuners_ins.hotplug_load_index = 0; ++ ++ /* check for frequency increase */ ++ if (avg_load > dbs_tuners_ins.up_threshold) { ++ /* should we enable auxillary CPUs? */ ++ if (num_online_cpus() < 2 && hotplug_in_avg_load > ++ dbs_tuners_ins.up_threshold) { ++ /* hotplug with cpufreq is nasty ++ * a call to cpufreq_governor_dbs may cause a lockup. ++ * wq is not running here so its safe. ++ */ ++ mutex_unlock(&this_dbs_info->timer_mutex); ++ cpu_up(1); ++ mutex_lock(&this_dbs_info->timer_mutex); ++ goto out; ++ } ++ ++ /* increase to highest frequency supported */ ++ if (policy->cur < policy->max) ++ __cpufreq_driver_target(policy, policy->max, ++ CPUFREQ_RELATION_H); ++ ++ goto out; ++ } ++ ++ /* check for frequency decrease */ ++ if (avg_load < dbs_tuners_ins.down_threshold) { ++ /* are we at the minimum frequency already? */ ++ if (policy->cur == policy->min) { ++ /* should we disable auxillary CPUs? */ ++ if (num_online_cpus() > 1 && hotplug_out_avg_load < ++ dbs_tuners_ins.down_threshold) { ++ mutex_unlock(&this_dbs_info->timer_mutex); ++ cpu_down(1); ++ mutex_lock(&this_dbs_info->timer_mutex); ++ } ++ goto out; ++ } ++ ++ /* bump down to the next lowest frequency in the table */ ++ if (cpufreq_frequency_table_next_lowest(policy, ++ this_dbs_info->freq_table, &index)) { ++ pr_err("%s: failed to get next lowest frequency\n", ++ __func__); ++ goto out; ++ } ++ ++ __cpufreq_driver_target(policy, ++ this_dbs_info->freq_table[index].frequency, ++ CPUFREQ_RELATION_L); ++ } ++out: ++ return; ++} ++ ++static void do_dbs_timer(struct work_struct *work) ++{ ++ struct cpu_dbs_info_s *dbs_info = ++ container_of(work, struct cpu_dbs_info_s, work.work); ++ unsigned int cpu = dbs_info->cpu; ++ ++ /* We want all related CPUs to do sampling nearly on same jiffy */ ++ int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); ++ ++ mutex_lock(&dbs_info->timer_mutex); ++ dbs_check_cpu(dbs_info); ++ queue_delayed_work_on(cpu, khotplug_wq, &dbs_info->work, delay); ++ mutex_unlock(&dbs_info->timer_mutex); ++} ++ ++static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) ++{ ++ /* We want all related CPUs to do sampling nearly on same jiffy */ ++ int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); ++ delay -= jiffies % delay; ++ ++ INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); ++ queue_delayed_work_on(dbs_info->cpu, khotplug_wq, &dbs_info->work, ++ delay); ++} ++ ++static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) ++{ ++ cancel_delayed_work_sync(&dbs_info->work); ++} ++ ++static int cpufreq_governor_dbs(struct cpufreq_policy *policy, ++ unsigned int event) ++{ ++ unsigned int cpu = policy->cpu; ++ struct cpu_dbs_info_s *this_dbs_info; ++ unsigned int i, j, max_periods; ++ int rc; ++ ++ this_dbs_info = &per_cpu(hp_cpu_dbs_info, cpu); ++ ++ switch (event) { ++ case CPUFREQ_GOV_START: ++ if ((!cpu_online(cpu)) || (!policy->cur)) ++ return -EINVAL; ++ ++ mutex_lock(&dbs_mutex); ++ dbs_enable++; ++ for_each_cpu(j, policy->cpus) { ++ struct cpu_dbs_info_s *j_dbs_info; ++ j_dbs_info = &per_cpu(hp_cpu_dbs_info, j); ++ j_dbs_info->cur_policy = policy; ++ ++ j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, ++ &j_dbs_info->prev_cpu_wall); ++ if (dbs_tuners_ins.ignore_nice) { ++ j_dbs_info->prev_cpu_nice = ++ kstat_cpu(j).cpustat.nice; ++ } ++ ++ max_periods = max(DEFAULT_HOTPLUG_IN_SAMPLING_PERIODS, ++ DEFAULT_HOTPLUG_OUT_SAMPLING_PERIODS); ++ dbs_tuners_ins.hotplug_load_history = kmalloc( ++ (sizeof(unsigned int) * max_periods), ++ GFP_KERNEL); ++ if (!dbs_tuners_ins.hotplug_load_history) { ++ WARN_ON(1); ++ return -ENOMEM; ++ } ++ for (i = 0; i < max_periods; i++) ++ dbs_tuners_ins.hotplug_load_history[i] = 50; ++ } ++ this_dbs_info->cpu = cpu; ++ this_dbs_info->freq_table = cpufreq_frequency_get_table(cpu); ++ /* ++ * Start the timerschedule work, when this governor ++ * is used for first time ++ */ ++ if (dbs_enable == 1) { ++ rc = sysfs_create_group(cpufreq_global_kobject, ++ &dbs_attr_group); ++ if (rc) { ++ mutex_unlock(&dbs_mutex); ++ return rc; ++ } ++ } ++ mutex_unlock(&dbs_mutex); ++ ++ mutex_init(&this_dbs_info->timer_mutex); ++ dbs_timer_init(this_dbs_info); ++ break; ++ ++ case CPUFREQ_GOV_STOP: ++ dbs_timer_exit(this_dbs_info); ++ ++ mutex_lock(&dbs_mutex); ++ mutex_destroy(&this_dbs_info->timer_mutex); ++ dbs_enable--; ++ mutex_unlock(&dbs_mutex); ++ if (!dbs_enable) ++ sysfs_remove_group(cpufreq_global_kobject, ++ &dbs_attr_group); ++ kfree(dbs_tuners_ins.hotplug_load_history); ++ /* ++ * XXX BIG CAVEAT: Stopping the governor with CPU1 offline ++ * will result in it remaining offline until the user onlines ++ * it again. It is up to the user to do this (for now). ++ */ ++ break; ++ ++ case CPUFREQ_GOV_LIMITS: ++ mutex_lock(&this_dbs_info->timer_mutex); ++ if (policy->max < this_dbs_info->cur_policy->cur) ++ __cpufreq_driver_target(this_dbs_info->cur_policy, ++ policy->max, CPUFREQ_RELATION_H); ++ else if (policy->min > this_dbs_info->cur_policy->cur) ++ __cpufreq_driver_target(this_dbs_info->cur_policy, ++ policy->min, CPUFREQ_RELATION_L); ++ mutex_unlock(&this_dbs_info->timer_mutex); ++ break; ++ } ++ return 0; ++} ++ ++static int __init cpufreq_gov_dbs_init(void) ++{ ++ int err; ++ cputime64_t wall; ++ u64 idle_time; ++ int cpu = get_cpu(); ++ ++ idle_time = get_cpu_idle_time_us(cpu, &wall); ++ put_cpu(); ++ if (idle_time != -1ULL) { ++ dbs_tuners_ins.up_threshold = DEFAULT_UP_FREQ_MIN_LOAD; ++ } else { ++ pr_err("cpufreq-hotplug: %s: assumes CONFIG_NO_HZ\n", ++ __func__); ++ return -EINVAL; ++ } ++ ++ khotplug_wq = create_workqueue("khotplug"); ++ if (!khotplug_wq) { ++ pr_err("Creation of khotplug failed\n"); ++ return -EFAULT; ++ } ++ err = cpufreq_register_governor(&cpufreq_gov_hotplug); ++ if (err) ++ destroy_workqueue(khotplug_wq); ++ ++ return err; ++} ++ ++static void __exit cpufreq_gov_dbs_exit(void) ++{ ++ cpufreq_unregister_governor(&cpufreq_gov_hotplug); ++ destroy_workqueue(khotplug_wq); ++} ++ ++MODULE_AUTHOR("Mike Turquette <mturquette@ti.com>"); ++MODULE_DESCRIPTION("'cpufreq_hotplug' - cpufreq governor for dynamic frequency scaling and CPU hotplugging"); ++MODULE_LICENSE("GPL"); ++ ++#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_HOTPLUG ++fs_initcall(cpufreq_gov_dbs_init); ++#else ++module_init(cpufreq_gov_dbs_init); ++#endif ++module_exit(cpufreq_gov_dbs_exit); +diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h +index a38fca8..6cbc3df 100644 +--- a/include/linux/cpufreq.h ++++ b/include/linux/cpufreq.h +@@ -355,6 +355,9 @@ extern struct cpufreq_governor cpufreq_gov_ondemand; + #elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE) + extern struct cpufreq_governor cpufreq_gov_conservative; + #define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_conservative) ++#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_HOTPLUG) ++extern struct cpufreq_governor cpufreq_gov_hotplug; ++#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_hotplug) + #endif + + +-- +1.6.6.1 + |