1 files changed, 879 insertions, 0 deletions
diff --git a/extras/recipes-kernel/linux/linux-omap-2.6.39/pm/linux-omap-2.6.39-ti-pm-wip-cpufreq-hotplug/0002-cpufreq-introduce-hotplug-governor.patch b/extras/recipes-kernel/linux/linux-omap-2.6.39/pm/linux-omap-2.6.39-ti-pm-wip-cpufreq-hotplug/0002-cpufreq-introduce-hotplug-governor.patch
new file mode 100644
index 00000000..731906cc
--- /dev/null
+++ b/extras/recipes-kernel/linux/linux-omap-2.6.39/pm/linux-omap-2.6.39-ti-pm-wip-cpufreq-hotplug/0002-cpufreq-introduce-hotplug-governor.patch
@@ -0,0 +1,879 @@
+From e4c777d8314d7925e4895f00b3a7ebd64a4d830b Mon Sep 17 00:00:00 2001
+From: Mike Turquette <mturquette@ti.com>
+Date: Tue, 17 May 2011 09:43:09 -0500
+Subject: [PATCH 2/2] cpufreq: introduce hotplug governor
+
+The "hotplug" governor scales CPU frequency based on load, similar to
+"ondemand".  It scales up to the highest frequency when "up_threshold"
+is crossed and scales down one frequency at a time when "down_threshold"
+is crossed.  Unlike those governors, target frequencies are determined
+by directly accessing the CPUfreq frequency table, instead of taking
+some percentage of maximum available frequency.
+
+The key difference in the "hotplug" governor is that it will disable
+auxillary CPUs when the system is very idle, and enable them again once
+the system becomes busy.  This is achieved by averaging load over
+multiple sampling periods; if CPUs were online or offlined based on a
+single sampling period then thrashing will occur.
+
+Sysfs entries exist for "hotplug_in_sampling_periods" and for
+"hotplug_out_sampling_periods" which determine how many consecutive
+periods get averaged to determine if auxillery CPUs should be onlined or
+offlined.  Defaults are 5 periods and 20 periods respectively.
+Otherwise the standard sysfs entries you might find for "ondemand" and
+"conservative" governors are there.
+
+To use this governor it is assumed that your CPUfreq driver has
+populated the CPUfreq table, CONFIG_NO_HZ is enabled and
+CONFIG_HOTPLUG_CPU is enabled.
+
+Changes in V2:
+	Corrected default sampling periods
+	Optimized load history array resizing
+	Maintain load history when resizing array
+	Add locking to dbs_check_cpu
+	Switch from enable_nonboot_cpus to cpu_up
+	Switch from disable_nonboot_cpus to down_cpu
+	Fix some printks
+	Coding style around for-loops
+
+Signed-off-by: Mike Turquette <mturquette@ti.com>
+Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
+Signed-off-by: Koen Kooi <koen@dominion.thruhere.net>
+---
+ Documentation/cpu-freq/governors.txt |   28 ++
+ drivers/cpufreq/Kconfig              |   33 ++
+ drivers/cpufreq/Makefile             |    1 +
+ drivers/cpufreq/cpufreq_hotplug.c    |  705 ++++++++++++++++++++++++++++++++++
+ include/linux/cpufreq.h              |    3 +
+ 5 files changed, 770 insertions(+), 0 deletions(-)
+ create mode 100644 drivers/cpufreq/cpufreq_hotplug.c
+
+diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt
+index e74d0a2..c2e3d3d 100644
+--- a/Documentation/cpu-freq/governors.txt
++++ b/Documentation/cpu-freq/governors.txt
+@@ -193,6 +193,34 @@ governor but for the opposite direction.  For example when set to its
+ default value of '20' it means that if the CPU usage needs to be below
+ 20% between samples to have the frequency decreased.
+ 
++
++2.6 Hotplug
++-----------
++
++The CPUfreq governor "hotplug" operates similary to "ondemand" and
++"conservative".  It's decisions are based primarily on CPU load.  Like
++"ondemand" the "hotplug" governor will ramp up to the highest frequency
++once the run-time tunable "up_threshold" parameter is crossed.  Like
++"conservative", the "hotplug" governor exports a "down_threshold"
++parameter that is also tunable at run-time.  When the "down_threshold"
++is crossed the CPU transitions to the next lowest frequency in the
++CPUfreq frequency table instead of decrementing the frequency based on a
++percentage of maximum load.
++
++The main reason "hotplug" governor exists is for architectures requiring
++that only the master CPU be online in order to hit low-power states
++(C-states).  OMAP4 is one such example of this.  The "hotplug" governor
++is also helpful in reducing thermal output in devices with tight thermal
++constraints.
++
++Auxillary CPUs are onlined/offline based on CPU load, but the decision
++to do so is made after averaging several sampling windows.  This is to
++reduce CPU hotplug "thrashing", which can be caused by normal system
++entropy and leads to lots of spurious plug-in and plug-out transitions.
++The number of sampling periods averaged together is tunable via the
++"hotplug_in_sampling_periods" and "hotplug_out_sampling_periods"
++run-time tunable parameters.
++
+ 3. The Governor Interface in the CPUfreq Core
+ =============================================
+ 
+diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
+index ca8ee80..c716a0e 100644
+--- a/drivers/cpufreq/Kconfig
++++ b/drivers/cpufreq/Kconfig
+@@ -110,6 +110,19 @@ config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
+ 	  Be aware that not all cpufreq drivers support the conservative
+ 	  governor. If unsure have a look at the help section of the
+ 	  driver. Fallback governor will be the performance governor.
++
++config CPU_FREQ_DEFAULT_GOV_HOTPLUG
++	bool "hotplug"
++	select CPU_FREQ_GOV_HOTPLUG
++	select CPU_FREQ_GOV_PERFORMANCE
++	help
++	  Use the CPUFreq governor 'hotplug' as default. This allows you
++	  to get a full dynamic frequency capable system with CPU
++	  hotplug support by simply loading your cpufreq low-level
++	  hardware driver.  Be aware that not all cpufreq drivers
++	  support the hotplug governor. If unsure have a look at
++	  the help section of the driver. Fallback governor will be the
++	  performance governor.
+ endchoice
+ 
+ config CPU_FREQ_GOV_PERFORMANCE
+@@ -190,4 +203,24 @@ config CPU_FREQ_GOV_CONSERVATIVE
+ 
+ 	  If in doubt, say N.
+ 
++config CPU_FREQ_GOV_HOTPLUG
++	tristate "'hotplug' cpufreq governor"
++	depends on CPU_FREQ && NO_HZ && HOTPLUG_CPU
++	help
++	  'hotplug' - this driver mimics the frequency scaling behavior
++	  in 'ondemand', but with several key differences.  First is
++	  that frequency transitions use the CPUFreq table directly,
++	  instead of incrementing in a percentage of the maximum
++	  available frequency.  Second 'hotplug' will offline auxillary
++	  CPUs when the system is idle, and online those CPUs once the
++	  system becomes busy again.  This last feature is needed for
++	  architectures which transition to low power states when only
++	  the "master" CPU is online, or for thermally constrained
++	  devices.
++
++	  If you don't have one of these architectures or devices, use
++	  'ondemand' instead.
++
++	  If in doubt, say N.
++
+ endif	# CPU_FREQ
+diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
+index 71fc3b4..05d564c 100644
+--- a/drivers/cpufreq/Makefile
++++ b/drivers/cpufreq/Makefile
+@@ -9,6 +9,7 @@ obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE)	+= cpufreq_powersave.o
+ obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE)	+= cpufreq_userspace.o
+ obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND)	+= cpufreq_ondemand.o
+ obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE)	+= cpufreq_conservative.o
++obj-$(CONFIG_CPU_FREQ_GOV_HOTPLUG)	+= cpufreq_hotplug.o
+ 
+ # CPUfreq cross-arch helpers
+ obj-$(CONFIG_CPU_FREQ_TABLE)		+= freq_table.o
+diff --git a/drivers/cpufreq/cpufreq_hotplug.c b/drivers/cpufreq/cpufreq_hotplug.c
+new file mode 100644
+index 0000000..85aa6d2
+--- /dev/null
++++ b/drivers/cpufreq/cpufreq_hotplug.c
+@@ -0,0 +1,705 @@
++/*
++ * CPUFreq hotplug governor
++ *
++ * Copyright (C) 2010 Texas Instruments, Inc.
++ *   Mike Turquette <mturquette@ti.com>
++ *   Santosh Shilimkar <santosh.shilimkar@ti.com>
++ *
++ * Based on ondemand governor
++ * Copyright (C)  2001 Russell King
++ *           (C)  2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>,
++ *                     Jun Nakajima <jun.nakajima@intel.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/cpufreq.h>
++#include <linux/cpu.h>
++#include <linux/jiffies.h>
++#include <linux/kernel_stat.h>
++#include <linux/mutex.h>
++#include <linux/hrtimer.h>
++#include <linux/tick.h>
++#include <linux/ktime.h>
++#include <linux/sched.h>
++#include <linux/err.h>
++#include <linux/slab.h>
++
++/* greater than 80% avg load across online CPUs increases frequency */
++#define DEFAULT_UP_FREQ_MIN_LOAD			(80)
++
++/* less than 20% avg load across online CPUs decreases frequency */
++#define DEFAULT_DOWN_FREQ_MAX_LOAD			(20)
++
++/* default sampling period (uSec) is bogus; 10x ondemand's default for x86 */
++#define DEFAULT_SAMPLING_PERIOD				(100000)
++
++/* default number of sampling periods to average before hotplug-in decision */
++#define DEFAULT_HOTPLUG_IN_SAMPLING_PERIODS		(5)
++
++/* default number of sampling periods to average before hotplug-out decision */
++#define DEFAULT_HOTPLUG_OUT_SAMPLING_PERIODS		(20)
++
++static void do_dbs_timer(struct work_struct *work);
++static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
++		unsigned int event);
++
++#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_HOTPLUG
++static
++#endif
++struct cpufreq_governor cpufreq_gov_hotplug = {
++       .name                   = "hotplug",
++       .governor               = cpufreq_governor_dbs,
++       .owner                  = THIS_MODULE,
++};
++
++struct cpu_dbs_info_s {
++	cputime64_t prev_cpu_idle;
++	cputime64_t prev_cpu_wall;
++	cputime64_t prev_cpu_nice;
++	struct cpufreq_policy *cur_policy;
++	struct delayed_work work;
++	struct cpufreq_frequency_table *freq_table;
++	int cpu;
++	/*
++	 * percpu mutex that serializes governor limit change with
++	 * do_dbs_timer invocation. We do not want do_dbs_timer to run
++	 * when user is changing the governor or limits.
++	 */
++	struct mutex timer_mutex;
++};
++static DEFINE_PER_CPU(struct cpu_dbs_info_s, hp_cpu_dbs_info);
++
++static unsigned int dbs_enable;	/* number of CPUs using this policy */
++
++/*
++ * dbs_mutex protects data in dbs_tuners_ins from concurrent changes on
++ * different CPUs. It protects dbs_enable in governor start/stop.
++ */
++static DEFINE_MUTEX(dbs_mutex);
++
++static struct workqueue_struct	*khotplug_wq;
++
++static struct dbs_tuners {
++	unsigned int sampling_rate;
++	unsigned int up_threshold;
++	unsigned int down_threshold;
++	unsigned int hotplug_in_sampling_periods;
++	unsigned int hotplug_out_sampling_periods;
++	unsigned int hotplug_load_index;
++	unsigned int *hotplug_load_history;
++	unsigned int ignore_nice;
++	unsigned int io_is_busy;
++} dbs_tuners_ins = {
++	.sampling_rate =		DEFAULT_SAMPLING_PERIOD,
++	.up_threshold =			DEFAULT_UP_FREQ_MIN_LOAD,
++	.down_threshold =		DEFAULT_DOWN_FREQ_MAX_LOAD,
++	.hotplug_in_sampling_periods =	DEFAULT_HOTPLUG_IN_SAMPLING_PERIODS,
++	.hotplug_out_sampling_periods =	DEFAULT_HOTPLUG_OUT_SAMPLING_PERIODS,
++	.hotplug_load_index =		0,
++	.ignore_nice =			0,
++	.io_is_busy =			0,
++};
++
++/*
++ * A corner case exists when switching io_is_busy at run-time: comparing idle
++ * times from a non-io_is_busy period to an io_is_busy period (or vice-versa)
++ * will misrepresent the actual change in system idleness.  We ignore this
++ * corner case: enabling io_is_busy might cause freq increase and disabling
++ * might cause freq decrease, which probably matches the original intent.
++ */
++static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
++{
++        u64 idle_time;
++        u64 iowait_time;
++
++        /* cpufreq-hotplug always assumes CONFIG_NO_HZ */
++        idle_time = get_cpu_idle_time_us(cpu, wall);
++
++	/* add time spent doing I/O to idle time */
++        if (dbs_tuners_ins.io_is_busy) {
++                iowait_time = get_cpu_iowait_time_us(cpu, wall);
++                /* cpufreq-hotplug always assumes CONFIG_NO_HZ */
++                if (iowait_time != -1ULL && idle_time >= iowait_time)
++                        idle_time -= iowait_time;
++        }
++
++        return idle_time;
++}
++
++/************************** sysfs interface ************************/
++
++/* XXX look at global sysfs macros in cpufreq.h, can those be used here? */
++
++/* cpufreq_hotplug Governor Tunables */
++#define show_one(file_name, object)					\
++static ssize_t show_##file_name						\
++(struct kobject *kobj, struct attribute *attr, char *buf)		\
++{									\
++	return sprintf(buf, "%u\n", dbs_tuners_ins.object);		\
++}
++show_one(sampling_rate, sampling_rate);
++show_one(up_threshold, up_threshold);
++show_one(down_threshold, down_threshold);
++show_one(hotplug_in_sampling_periods, hotplug_in_sampling_periods);
++show_one(hotplug_out_sampling_periods, hotplug_out_sampling_periods);
++show_one(ignore_nice_load, ignore_nice);
++show_one(io_is_busy, io_is_busy);
++
++static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b,
++				   const char *buf, size_t count)
++{
++	unsigned int input;
++	int ret;
++	ret = sscanf(buf, "%u", &input);
++	if (ret != 1)
++		return -EINVAL;
++
++	mutex_lock(&dbs_mutex);
++	dbs_tuners_ins.sampling_rate = input;
++	mutex_unlock(&dbs_mutex);
++
++	return count;
++}
++
++static ssize_t store_up_threshold(struct kobject *a, struct attribute *b,
++				  const char *buf, size_t count)
++{
++	unsigned int input;
++	int ret;
++	ret = sscanf(buf, "%u", &input);
++
++	if (ret != 1 || input <= dbs_tuners_ins.down_threshold) {
++		return -EINVAL;
++	}
++
++	mutex_lock(&dbs_mutex);
++	dbs_tuners_ins.up_threshold = input;
++	mutex_unlock(&dbs_mutex);
++
++	return count;
++}
++
++static ssize_t store_down_threshold(struct kobject *a, struct attribute *b,
++				  const char *buf, size_t count)
++{
++	unsigned int input;
++	int ret;
++	ret = sscanf(buf, "%u", &input);
++
++	if (ret != 1 || input >= dbs_tuners_ins.up_threshold) {
++		return -EINVAL;
++	}
++
++	mutex_lock(&dbs_mutex);
++	dbs_tuners_ins.down_threshold = input;
++	mutex_unlock(&dbs_mutex);
++
++	return count;
++}
++
++static ssize_t store_hotplug_in_sampling_periods(struct kobject *a,
++		struct attribute *b, const char *buf, size_t count)
++{
++	unsigned int input;
++	unsigned int *temp;
++	unsigned int max_windows;
++	int ret;
++	ret = sscanf(buf, "%u", &input);
++
++	if (ret != 1)
++		return -EINVAL;
++
++	/* already using this value, bail out */
++	if (input == dbs_tuners_ins.hotplug_in_sampling_periods)
++		return count;
++
++	mutex_lock(&dbs_mutex);
++	ret = count;
++	max_windows = max(dbs_tuners_ins.hotplug_in_sampling_periods,
++			dbs_tuners_ins.hotplug_out_sampling_periods);
++
++	/* no need to resize array */
++	if (input <= max_windows) {
++		dbs_tuners_ins.hotplug_in_sampling_periods = input;
++		goto out;
++	}
++
++	/* resize array */
++	temp = kmalloc((sizeof(unsigned int) * input), GFP_KERNEL);
++
++	if (!temp || IS_ERR(temp)) {
++		ret = -ENOMEM;
++		goto out;
++	}
++
++	memcpy(temp, dbs_tuners_ins.hotplug_load_history,
++			(max_windows * sizeof(unsigned int)));
++	kfree(dbs_tuners_ins.hotplug_load_history);
++
++	/* replace old buffer, old number of sampling periods & old index */
++	dbs_tuners_ins.hotplug_load_history = temp;
++	dbs_tuners_ins.hotplug_in_sampling_periods = input;
++	dbs_tuners_ins.hotplug_load_index = max_windows;
++out:
++	mutex_unlock(&dbs_mutex);
++
++	return ret;
++}
++
++static ssize_t store_hotplug_out_sampling_periods(struct kobject *a,
++		struct attribute *b, const char *buf, size_t count)
++{
++	unsigned int input;
++	unsigned int *temp;
++	unsigned int max_windows;
++	int ret;
++	ret = sscanf(buf, "%u", &input);
++
++	if (ret != 1)
++		return -EINVAL;
++
++	/* already using this value, bail out */
++	if (input == dbs_tuners_ins.hotplug_out_sampling_periods)
++		return count;
++
++	mutex_lock(&dbs_mutex);
++	ret = count;
++	max_windows = max(dbs_tuners_ins.hotplug_in_sampling_periods,
++			dbs_tuners_ins.hotplug_out_sampling_periods);
++
++	/* no need to resize array */
++	if (input <= max_windows) {
++		dbs_tuners_ins.hotplug_out_sampling_periods = input;
++		goto out;
++	}
++
++	/* resize array */
++	temp = kmalloc((sizeof(unsigned int) * input), GFP_KERNEL);
++
++	if (!temp || IS_ERR(temp)) {
++		ret = -ENOMEM;
++		goto out;
++	}
++
++	memcpy(temp, dbs_tuners_ins.hotplug_load_history,
++			(max_windows * sizeof(unsigned int)));
++	kfree(dbs_tuners_ins.hotplug_load_history);
++
++	/* replace old buffer, old number of sampling periods & old index */
++	dbs_tuners_ins.hotplug_load_history = temp;
++	dbs_tuners_ins.hotplug_out_sampling_periods = input;
++	dbs_tuners_ins.hotplug_load_index = max_windows;
++out:
++	mutex_unlock(&dbs_mutex);
++
++	return ret;
++}
++
++static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
++				      const char *buf, size_t count)
++{
++	unsigned int input;
++	int ret;
++
++	unsigned int j;
++
++	ret = sscanf(buf, "%u", &input);
++	if (ret != 1)
++		return -EINVAL;
++
++	if (input > 1)
++		input = 1;
++
++	mutex_lock(&dbs_mutex);
++	if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */
++		mutex_unlock(&dbs_mutex);
++		return count;
++	}
++	dbs_tuners_ins.ignore_nice = input;
++
++	/* we need to re-evaluate prev_cpu_idle */
++	for_each_online_cpu(j) {
++		struct cpu_dbs_info_s *dbs_info;
++		dbs_info = &per_cpu(hp_cpu_dbs_info, j);
++		dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
++						&dbs_info->prev_cpu_wall);
++		if (dbs_tuners_ins.ignore_nice)
++			dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
++
++	}
++	mutex_unlock(&dbs_mutex);
++
++	return count;
++}
++
++static ssize_t store_io_is_busy(struct kobject *a, struct attribute *b,
++				   const char *buf, size_t count)
++{
++	unsigned int input;
++	int ret;
++
++	ret = sscanf(buf, "%u", &input);
++	if (ret != 1)
++		return -EINVAL;
++
++	mutex_lock(&dbs_mutex);
++	dbs_tuners_ins.io_is_busy = !!input;
++	mutex_unlock(&dbs_mutex);
++
++	return count;
++}
++
++define_one_global_rw(sampling_rate);
++define_one_global_rw(up_threshold);
++define_one_global_rw(down_threshold);
++define_one_global_rw(hotplug_in_sampling_periods);
++define_one_global_rw(hotplug_out_sampling_periods);
++define_one_global_rw(ignore_nice_load);
++define_one_global_rw(io_is_busy);
++
++static struct attribute *dbs_attributes[] = {
++	&sampling_rate.attr,
++	&up_threshold.attr,
++	&down_threshold.attr,
++	&hotplug_in_sampling_periods.attr,
++	&hotplug_out_sampling_periods.attr,
++	&ignore_nice_load.attr,
++	&io_is_busy.attr,
++	NULL
++};
++
++static struct attribute_group dbs_attr_group = {
++	.attrs = dbs_attributes,
++	.name = "hotplug",
++};
++
++/************************** sysfs end ************************/
++
++static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
++{
++	/* combined load of all enabled CPUs */
++	unsigned int total_load = 0;
++	/* single largest CPU load */
++	unsigned int max_load = 0;
++	/* average load across all enabled CPUs */
++	unsigned int avg_load = 0;
++	/* average load across multiple sampling periods for hotplug events */
++	unsigned int hotplug_in_avg_load = 0;
++	unsigned int hotplug_out_avg_load = 0;
++	/* number of sampling periods averaged for hotplug decisions */
++	unsigned int periods;
++
++	struct cpufreq_policy *policy;
++	unsigned int index = 0;
++	unsigned int i, j;
++
++	policy = this_dbs_info->cur_policy;
++
++	/*
++	 * cpu load accounting
++	 * get highest load, total load and average load across all CPUs
++	 */
++	for_each_cpu(j, policy->cpus) {
++		unsigned int load;
++		unsigned int idle_time, wall_time;
++		cputime64_t cur_wall_time, cur_idle_time;
++		struct cpu_dbs_info_s *j_dbs_info;
++
++		j_dbs_info = &per_cpu(hp_cpu_dbs_info, j);
++
++		/* update both cur_idle_time and cur_wall_time */
++		cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
++
++		/* how much wall time has passed since last iteration? */
++		wall_time = (unsigned int) cputime64_sub(cur_wall_time,
++				j_dbs_info->prev_cpu_wall);
++		j_dbs_info->prev_cpu_wall = cur_wall_time;
++
++		/* how much idle time has passed since last iteration? */
++		idle_time = (unsigned int) cputime64_sub(cur_idle_time,
++				j_dbs_info->prev_cpu_idle);
++		j_dbs_info->prev_cpu_idle = cur_idle_time;
++
++		if (unlikely(!wall_time || wall_time < idle_time))
++			continue;
++
++		/* load is the percentage of time not spent in idle */
++		load = 100 * (wall_time - idle_time) / wall_time;
++
++		/* keep track of combined load across all CPUs */
++		total_load += load;
++
++		/* keep track of highest single load across all CPUs */
++		if (load > max_load)
++			max_load = load;
++	}
++
++	/* calculate the average load across all related CPUs */
++	avg_load = total_load / num_online_cpus();
++
++
++	/*
++	 * hotplug load accounting
++	 * average load over multiple sampling periods
++	 */
++
++	/* how many sampling periods do we use for hotplug decisions? */
++	periods = max(dbs_tuners_ins.hotplug_in_sampling_periods,
++			dbs_tuners_ins.hotplug_out_sampling_periods);
++
++	/* store avg_load in the circular buffer */
++	dbs_tuners_ins.hotplug_load_history[dbs_tuners_ins.hotplug_load_index]
++		= avg_load;
++
++	/* compute average load across in & out sampling periods */
++	for (i = 0, j = dbs_tuners_ins.hotplug_load_index;
++			i < periods; i++, j--) {
++		if (i < dbs_tuners_ins.hotplug_in_sampling_periods)
++			hotplug_in_avg_load +=
++				dbs_tuners_ins.hotplug_load_history[j];
++		if (i < dbs_tuners_ins.hotplug_out_sampling_periods)
++			hotplug_out_avg_load +=
++				dbs_tuners_ins.hotplug_load_history[j];
++
++		if (j == 0)
++			j = periods;
++	}
++
++	hotplug_in_avg_load = hotplug_in_avg_load /
++		dbs_tuners_ins.hotplug_in_sampling_periods;
++
++	hotplug_out_avg_load = hotplug_out_avg_load /
++		dbs_tuners_ins.hotplug_out_sampling_periods;
++
++	/* return to first element if we're at the circular buffer's end */
++	if (++dbs_tuners_ins.hotplug_load_index == periods)
++		dbs_tuners_ins.hotplug_load_index = 0;
++
++	/* check for frequency increase */
++	if (avg_load > dbs_tuners_ins.up_threshold) {
++		/* should we enable auxillary CPUs? */
++		if (num_online_cpus() < 2 && hotplug_in_avg_load >
++				dbs_tuners_ins.up_threshold) {
++			/* hotplug with cpufreq is nasty
++			 * a call to cpufreq_governor_dbs may cause a lockup.
++			 * wq is not running here so its safe.
++			 */
++			mutex_unlock(&this_dbs_info->timer_mutex);
++			cpu_up(1);
++			mutex_lock(&this_dbs_info->timer_mutex);
++			goto out;
++		}
++
++		/* increase to highest frequency supported */
++		if (policy->cur < policy->max)
++			__cpufreq_driver_target(policy, policy->max,
++					CPUFREQ_RELATION_H);
++
++		goto out;
++	}
++
++	/* check for frequency decrease */
++	if (avg_load < dbs_tuners_ins.down_threshold) {
++		/* are we at the minimum frequency already? */
++		if (policy->cur == policy->min) {
++			/* should we disable auxillary CPUs? */
++			if (num_online_cpus() > 1 && hotplug_out_avg_load <
++					dbs_tuners_ins.down_threshold) {
++				mutex_unlock(&this_dbs_info->timer_mutex);
++				cpu_down(1);
++				mutex_lock(&this_dbs_info->timer_mutex);
++			}
++			goto out;
++		}
++
++		/* bump down to the next lowest frequency in the table */
++		if (cpufreq_frequency_table_next_lowest(policy,
++					this_dbs_info->freq_table, &index)) {
++			pr_err("%s: failed to get next lowest frequency\n",
++					__func__);
++			goto out;
++		}
++
++		__cpufreq_driver_target(policy,
++				this_dbs_info->freq_table[index].frequency,
++				CPUFREQ_RELATION_L);
++	}
++out:
++	return;
++}
++
++static void do_dbs_timer(struct work_struct *work)
++{
++	struct cpu_dbs_info_s *dbs_info =
++		container_of(work, struct cpu_dbs_info_s, work.work);
++	unsigned int cpu = dbs_info->cpu;
++
++	/* We want all related CPUs to do sampling nearly on same jiffy */
++	int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
++
++	mutex_lock(&dbs_info->timer_mutex);
++	dbs_check_cpu(dbs_info);
++	queue_delayed_work_on(cpu, khotplug_wq, &dbs_info->work, delay);
++	mutex_unlock(&dbs_info->timer_mutex);
++}
++
++static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
++{
++	/* We want all related CPUs to do sampling nearly on same jiffy */
++	int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
++	delay -= jiffies % delay;
++
++	INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer);
++	queue_delayed_work_on(dbs_info->cpu, khotplug_wq, &dbs_info->work,
++		delay);
++}
++
++static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
++{
++	cancel_delayed_work_sync(&dbs_info->work);
++}
++
++static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
++				   unsigned int event)
++{
++	unsigned int cpu = policy->cpu;
++	struct cpu_dbs_info_s *this_dbs_info;
++	unsigned int i, j, max_periods;
++	int rc;
++
++	this_dbs_info = &per_cpu(hp_cpu_dbs_info, cpu);
++
++	switch (event) {
++	case CPUFREQ_GOV_START:
++		if ((!cpu_online(cpu)) || (!policy->cur))
++			return -EINVAL;
++
++		mutex_lock(&dbs_mutex);
++		dbs_enable++;
++		for_each_cpu(j, policy->cpus) {
++			struct cpu_dbs_info_s *j_dbs_info;
++			j_dbs_info = &per_cpu(hp_cpu_dbs_info, j);
++			j_dbs_info->cur_policy = policy;
++
++			j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
++						&j_dbs_info->prev_cpu_wall);
++			if (dbs_tuners_ins.ignore_nice) {
++				j_dbs_info->prev_cpu_nice =
++						kstat_cpu(j).cpustat.nice;
++			}
++
++			max_periods = max(DEFAULT_HOTPLUG_IN_SAMPLING_PERIODS,
++					DEFAULT_HOTPLUG_OUT_SAMPLING_PERIODS);
++			dbs_tuners_ins.hotplug_load_history = kmalloc(
++					(sizeof(unsigned int) * max_periods),
++					GFP_KERNEL);
++			if (!dbs_tuners_ins.hotplug_load_history) {
++				WARN_ON(1);
++				return -ENOMEM;
++			}
++			for (i = 0; i < max_periods; i++)
++				dbs_tuners_ins.hotplug_load_history[i] = 50;
++		}
++		this_dbs_info->cpu = cpu;
++		this_dbs_info->freq_table = cpufreq_frequency_get_table(cpu);
++		/*
++		 * Start the timerschedule work, when this governor
++		 * is used for first time
++		 */
++		if (dbs_enable == 1) {
++			rc = sysfs_create_group(cpufreq_global_kobject,
++						&dbs_attr_group);
++			if (rc) {
++				mutex_unlock(&dbs_mutex);
++				return rc;
++			}
++		}
++		mutex_unlock(&dbs_mutex);
++
++		mutex_init(&this_dbs_info->timer_mutex);
++		dbs_timer_init(this_dbs_info);
++		break;
++
++	case CPUFREQ_GOV_STOP:
++		dbs_timer_exit(this_dbs_info);
++
++		mutex_lock(&dbs_mutex);
++		mutex_destroy(&this_dbs_info->timer_mutex);
++		dbs_enable--;
++		mutex_unlock(&dbs_mutex);
++		if (!dbs_enable)
++			sysfs_remove_group(cpufreq_global_kobject,
++					   &dbs_attr_group);
++		kfree(dbs_tuners_ins.hotplug_load_history);
++		/*
++		 * XXX BIG CAVEAT: Stopping the governor with CPU1 offline
++		 * will result in it remaining offline until the user onlines
++		 * it again.  It is up to the user to do this (for now).
++		 */
++		break;
++
++	case CPUFREQ_GOV_LIMITS:
++		mutex_lock(&this_dbs_info->timer_mutex);
++		if (policy->max < this_dbs_info->cur_policy->cur)
++			__cpufreq_driver_target(this_dbs_info->cur_policy,
++				policy->max, CPUFREQ_RELATION_H);
++		else if (policy->min > this_dbs_info->cur_policy->cur)
++			__cpufreq_driver_target(this_dbs_info->cur_policy,
++				policy->min, CPUFREQ_RELATION_L);
++		mutex_unlock(&this_dbs_info->timer_mutex);
++		break;
++	}
++	return 0;
++}
++
++static int __init cpufreq_gov_dbs_init(void)
++{
++	int err;
++	cputime64_t wall;
++	u64 idle_time;
++	int cpu = get_cpu();
++
++	idle_time = get_cpu_idle_time_us(cpu, &wall);
++	put_cpu();
++	if (idle_time != -1ULL) {
++		dbs_tuners_ins.up_threshold = DEFAULT_UP_FREQ_MIN_LOAD;
++	} else {
++		pr_err("cpufreq-hotplug: %s: assumes CONFIG_NO_HZ\n",
++				__func__);
++		return -EINVAL;
++	}
++
++	khotplug_wq = create_workqueue("khotplug");
++	if (!khotplug_wq) {
++		pr_err("Creation of khotplug failed\n");
++		return -EFAULT;
++	}
++	err = cpufreq_register_governor(&cpufreq_gov_hotplug);
++	if (err)
++		destroy_workqueue(khotplug_wq);
++
++	return err;
++}
++
++static void __exit cpufreq_gov_dbs_exit(void)
++{
++	cpufreq_unregister_governor(&cpufreq_gov_hotplug);
++	destroy_workqueue(khotplug_wq);
++}
++
++MODULE_AUTHOR("Mike Turquette <mturquette@ti.com>");
++MODULE_DESCRIPTION("'cpufreq_hotplug' - cpufreq governor for dynamic frequency scaling and CPU hotplugging");
++MODULE_LICENSE("GPL");
++
++#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_HOTPLUG
++fs_initcall(cpufreq_gov_dbs_init);
++#else
++module_init(cpufreq_gov_dbs_init);
++#endif
++module_exit(cpufreq_gov_dbs_exit);
+diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
+index a38fca8..6cbc3df 100644
+--- a/include/linux/cpufreq.h
++++ b/include/linux/cpufreq.h
+@@ -355,6 +355,9 @@ extern struct cpufreq_governor cpufreq_gov_ondemand;
+ #elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE)
+ extern struct cpufreq_governor cpufreq_gov_conservative;
+ #define CPUFREQ_DEFAULT_GOVERNOR	(&cpufreq_gov_conservative)
++#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_HOTPLUG)
++extern struct cpufreq_governor cpufreq_gov_hotplug;
++#define CPUFREQ_DEFAULT_GOVERNOR	(&cpufreq_gov_hotplug)
+ #endif
+ 
+ 
+-- 
+1.6.6.1
+