diff mbox

[v7,3/7] : x86: refactor x86 idle power management code and remove all instances of pm_idle.

Message ID 20091006153126.GA7358@linux.vnet.ibm.com (mailing list archive)
State Superseded, archived
Headers show

Commit Message

Arun Bharadwaj Oct. 6, 2009, 3:31 p.m. UTC
* Arun R Bharadwaj <arun@linux.vnet.ibm.com> [2009-10-06 20:54:21]:

This patch cleans up x86 of all instances of pm_idle.

pm_idle which was earlier called from cpu_idle() idle loop
is replaced by cpuidle_idle_call.

x86 also registers to cpuidle when the idle routine is selected,
by populating the cpuidle_device data structure for each cpu.

This is replicated for apm module and for xen, which also used pm_idle.


Signed-off-by: Arun R Bharadwaj <arun@linux.vnet.ibm.com>
---
 arch/x86/kernel/apm_32.c     |   37 ++++++++++++++++++--
 arch/x86/kernel/process.c    |   79 ++++++++++++++++++++++++++++++++-----------
 arch/x86/kernel/process_32.c |    3 +
 arch/x86/kernel/process_64.c |    3 +
 arch/x86/xen/setup.c         |   22 +++++++++++
 5 files changed, 118 insertions(+), 26 deletions(-)

Comments

Peter Zijlstra Oct. 7, 2009, 2:45 p.m. UTC | #1
On Tue, 2009-10-06 at 21:01 +0530, Arun R Bharadwaj wrote:
> +++ linux.trees.git/arch/x86/kernel/process.c
> @@ -9,6 +9,7 @@
>  #include <linux/pm.h>
>  #include <linux/clockchips.h>
>  #include <linux/random.h>
> +#include <linux/cpuidle.h>
>  #include <trace/events/power.h>
>  #include <asm/system.h>
>  #include <asm/apic.h>
> @@ -244,12 +245,6 @@ int sys_vfork(struct pt_regs *regs)
>  unsigned long boot_option_idle_override = 0;
>  EXPORT_SYMBOL(boot_option_idle_override);
>  
> -/*
> - * Powermanagement idle function, if any..
> - */
> -void (*pm_idle)(void);
> -EXPORT_SYMBOL(pm_idle);
> -
>  #ifdef CONFIG_X86_32
>  /*
>   * This halt magic was a workaround for ancient floppy DMA
> @@ -329,17 +324,15 @@ static void do_nothing(void *unused)
>  }
>  
>  /*
> - * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
> - * pm_idle and update to new pm_idle value. Required while changing pm_idle
> - * handler on SMP systems.
> + * cpu_idle_wait - Required while changing idle routine handler on SMP systems.
>   *
> - * Caller must have changed pm_idle to the new value before the call. Old
> - * pm_idle value will not be used by any CPU after the return of this function.
> + * Caller must have changed idle routine to the new value before the call. Old
> + * value will not be used by any CPU after the return of this function.
>   */
>  void cpu_idle_wait(void)
>  {
>         smp_mb();
> -       /* kick all the CPUs so that they exit out of pm_idle */
> +       /* kick all the CPUs so that they exit out of idle loop */
>         smp_call_function(do_nothing, NULL, 1);
>  }
>  EXPORT_SYMBOL_GPL(cpu_idle_wait);
> @@ -518,15 +511,59 @@ static void c1e_idle(void)
>                 default_idle();
>  }
>  
> +static void (*local_idle)(void);
> +DEFINE_PER_CPU(struct cpuidle_device, idle_devices);
> +
> +struct cpuidle_driver cpuidle_default_driver = {
> +       .name =         "cpuidle_default",
> +};
> +
> +static int local_idle_loop(struct cpuidle_device *dev, struct cpuidle_state *st)
> +{
> +       ktime_t t1, t2;
> +       s64 diff;
> +       int ret;
> +
> +       t1 = ktime_get();
> +       local_idle();
> +       t2 = ktime_get();
> +
> +       diff = ktime_to_us(ktime_sub(t2, t1));
> +       if (diff > INT_MAX)
> +               diff = INT_MAX;
> +       ret = (int) diff;
> +
> +       return ret;
> +}
> +
> +static int setup_cpuidle_simple(void)
> +{
> +       struct cpuidle_device *dev;
> +       int cpu;
> +
> +       if (!cpuidle_curr_driver)
> +               cpuidle_register_driver(&cpuidle_default_driver);
> +
> +       for_each_online_cpu(cpu) {
> +               dev = &per_cpu(idle_devices, cpu);
> +               dev->cpu = cpu;
> +               dev->states[0].enter = local_idle_loop;
> +               dev->state_count = 1;
> +               cpuidle_register_device(dev);
> +       }
> +       return 0;
> +}
> +device_initcall(setup_cpuidle_simple);
> +
>  void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
>  {
>  #ifdef CONFIG_SMP
> -       if (pm_idle == poll_idle && smp_num_siblings > 1) {
> +       if (local_idle == poll_idle && smp_num_siblings > 1) {
>                 printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
>                         " performance may degrade.\n");
>         }
>  #endif
> -       if (pm_idle)
> +       if (local_idle)
>                 return;
>  
>         if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
> @@ -534,18 +571,20 @@ void __cpuinit select_idle_routine(const
>                  * One CPU supports mwait => All CPUs supports mwait
>                  */
>                 printk(KERN_INFO "using mwait in idle threads.\n");
> -               pm_idle = mwait_idle;
> +               local_idle = mwait_idle;
>         } else if (check_c1e_idle(c)) {
>                 printk(KERN_INFO "using C1E aware idle routine\n");
> -               pm_idle = c1e_idle;
> +               local_idle = c1e_idle;
>         } else
> -               pm_idle = default_idle;
> +               local_idle = default_idle;
> +
> +       return;
>  }
>  
>  void __init init_c1e_mask(void)
>  {
>         /* If we're using c1e_idle, we need to allocate c1e_mask. */
> -       if (pm_idle == c1e_idle)
> +       if (local_idle == c1e_idle)
>                 zalloc_cpumask_var(&c1e_mask, GFP_KERNEL);
>  }
>  
> @@ -556,7 +595,7 @@ static int __init idle_setup(char *str)
>  
>         if (!strcmp(str, "poll")) {
>                 printk("using polling idle threads.\n");
> -               pm_idle = poll_idle;
> +               local_idle = poll_idle;
>         } else if (!strcmp(str, "mwait"))
>                 force_mwait = 1;
>         else if (!strcmp(str, "halt")) {
> @@ -567,7 +606,7 @@ static int __init idle_setup(char *str)
>                  * To continue to load the CPU idle driver, don't touch
>                  * the boot_option_idle_override.
>                  */
> -               pm_idle = default_idle;
> +               local_idle = default_idle;
>                 idle_halt = 1;
>                 return 0;
>         } else if (!strcmp(str, "nomwait")) {


What guarantees that the cpuidle bits actually select this
cpuidle_default driver when you do idle=poll?

Also, cpuidle already has a poll loop in it, why duplicate that?
Arun Bharadwaj Oct. 7, 2009, 4:45 p.m. UTC | #2
* Peter Zijlstra <a.p.zijlstra@chello.nl> [2009-10-07 16:45:50]:

> On Tue, 2009-10-06 at 21:01 +0530, Arun R Bharadwaj wrote:
> > +++ linux.trees.git/arch/x86/kernel/process.c
> > @@ -9,6 +9,7 @@
> >  #include <linux/pm.h>
> >  #include <linux/clockchips.h>
> >  #include <linux/random.h>
> > +#include <linux/cpuidle.h>
> >  #include <trace/events/power.h>
> >  #include <asm/system.h>
> >  #include <asm/apic.h>
> > @@ -244,12 +245,6 @@ int sys_vfork(struct pt_regs *regs)
> >  unsigned long boot_option_idle_override = 0;
> >  EXPORT_SYMBOL(boot_option_idle_override);
> >  
> > -/*
> > - * Powermanagement idle function, if any..
> > - */
> > -void (*pm_idle)(void);
> > -EXPORT_SYMBOL(pm_idle);
> > -
> >  #ifdef CONFIG_X86_32
> >  /*
> >   * This halt magic was a workaround for ancient floppy DMA
> > @@ -329,17 +324,15 @@ static void do_nothing(void *unused)
> >  }
> >  
> >  /*
> > - * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
> > - * pm_idle and update to new pm_idle value. Required while changing pm_idle
> > - * handler on SMP systems.
> > + * cpu_idle_wait - Required while changing idle routine handler on SMP systems.
> >   *
> > - * Caller must have changed pm_idle to the new value before the call. Old
> > - * pm_idle value will not be used by any CPU after the return of this function.
> > + * Caller must have changed idle routine to the new value before the call. Old
> > + * value will not be used by any CPU after the return of this function.
> >   */
> >  void cpu_idle_wait(void)
> >  {
> >         smp_mb();
> > -       /* kick all the CPUs so that they exit out of pm_idle */
> > +       /* kick all the CPUs so that they exit out of idle loop */
> >         smp_call_function(do_nothing, NULL, 1);
> >  }
> >  EXPORT_SYMBOL_GPL(cpu_idle_wait);
> > @@ -518,15 +511,59 @@ static void c1e_idle(void)
> >                 default_idle();
> >  }
> >  
> > +static void (*local_idle)(void);
> > +DEFINE_PER_CPU(struct cpuidle_device, idle_devices);
> > +
> > +struct cpuidle_driver cpuidle_default_driver = {
> > +       .name =         "cpuidle_default",
> > +};
> > +
> > +static int local_idle_loop(struct cpuidle_device *dev, struct cpuidle_state *st)
> > +{
> > +       ktime_t t1, t2;
> > +       s64 diff;
> > +       int ret;
> > +
> > +       t1 = ktime_get();
> > +       local_idle();
> > +       t2 = ktime_get();
> > +
> > +       diff = ktime_to_us(ktime_sub(t2, t1));
> > +       if (diff > INT_MAX)
> > +               diff = INT_MAX;
> > +       ret = (int) diff;
> > +
> > +       return ret;
> > +}
> > +
> > +static int setup_cpuidle_simple(void)
> > +{
> > +       struct cpuidle_device *dev;
> > +       int cpu;
> > +
> > +       if (!cpuidle_curr_driver)
> > +               cpuidle_register_driver(&cpuidle_default_driver);
> > +
> > +       for_each_online_cpu(cpu) {
> > +               dev = &per_cpu(idle_devices, cpu);
> > +               dev->cpu = cpu;
> > +               dev->states[0].enter = local_idle_loop;
> > +               dev->state_count = 1;
> > +               cpuidle_register_device(dev);
> > +       }
> > +       return 0;
> > +}
> > +device_initcall(setup_cpuidle_simple);
> > +
> >  void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
> >  {
> >  #ifdef CONFIG_SMP
> > -       if (pm_idle == poll_idle && smp_num_siblings > 1) {
> > +       if (local_idle == poll_idle && smp_num_siblings > 1) {
> >                 printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
> >                         " performance may degrade.\n");
> >         }
> >  #endif
> > -       if (pm_idle)
> > +       if (local_idle)
> >                 return;
> >  
> >         if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
> > @@ -534,18 +571,20 @@ void __cpuinit select_idle_routine(const
> >                  * One CPU supports mwait => All CPUs supports mwait
> >                  */
> >                 printk(KERN_INFO "using mwait in idle threads.\n");
> > -               pm_idle = mwait_idle;
> > +               local_idle = mwait_idle;
> >         } else if (check_c1e_idle(c)) {
> >                 printk(KERN_INFO "using C1E aware idle routine\n");
> > -               pm_idle = c1e_idle;
> > +               local_idle = c1e_idle;
> >         } else
> > -               pm_idle = default_idle;
> > +               local_idle = default_idle;
> > +
> > +       return;
> >  }
> >  
> >  void __init init_c1e_mask(void)
> >  {
> >         /* If we're using c1e_idle, we need to allocate c1e_mask. */
> > -       if (pm_idle == c1e_idle)
> > +       if (local_idle == c1e_idle)
> >                 zalloc_cpumask_var(&c1e_mask, GFP_KERNEL);
> >  }
> >  
> > @@ -556,7 +595,7 @@ static int __init idle_setup(char *str)
> >  
> >         if (!strcmp(str, "poll")) {
> >                 printk("using polling idle threads.\n");
> > -               pm_idle = poll_idle;
> > +               local_idle = poll_idle;
> >         } else if (!strcmp(str, "mwait"))
> >                 force_mwait = 1;
> >         else if (!strcmp(str, "halt")) {
> > @@ -567,7 +606,7 @@ static int __init idle_setup(char *str)
> >                  * To continue to load the CPU idle driver, don't touch
> >                  * the boot_option_idle_override.
> >                  */
> > -               pm_idle = default_idle;
> > +               local_idle = default_idle;
> >                 idle_halt = 1;
> >                 return 0;
> >         } else if (!strcmp(str, "nomwait")) {
> 
> 
> What guarantees that the cpuidle bits actually select this
> cpuidle_default driver when you do idle=poll?
> 

When we do a idle=poll, it sets boot_option_idle_override = 1, which
is checked during cpuidle_register_device in acpi/processor_idle.c

So cpuidle devices are not even registered if this option is set.

But, in acpi/processor_core.c where cpuidle_register_driver happens,
this check is not made currently. So, I guess this check must be added
before we register acpi_idle driver.

> Also, cpuidle already has a poll loop in it, why duplicate that?
>

Suppose the arch doesnt have a poll loop of its own, it can use the
one provided by cpuidle. I have just retained this from the earlier
implementation.

--arun
Arun Bharadwaj Oct. 8, 2009, 5:54 a.m. UTC | #3
* Peter Zijlstra <a.p.zijlstra@chello.nl> [2009-10-07 16:45:50]:

> On Tue, 2009-10-06 at 21:01 +0530, Arun R Bharadwaj wrote:
> > +++ linux.trees.git/arch/x86/kernel/process.c
> > @@ -9,6 +9,7 @@
> >  #include <linux/pm.h>
> >  #include <linux/clockchips.h>
> >  #include <linux/random.h>
> > +#include <linux/cpuidle.h>
> >  #include <trace/events/power.h>
> >  #include <asm/system.h>
> >  #include <asm/apic.h>
> > @@ -244,12 +245,6 @@ int sys_vfork(struct pt_regs *regs)
> >  unsigned long boot_option_idle_override = 0;
> >  EXPORT_SYMBOL(boot_option_idle_override);
> >  
> > -/*
> > - * Powermanagement idle function, if any..
> > - */
> > -void (*pm_idle)(void);
> > -EXPORT_SYMBOL(pm_idle);
> > -
> >  #ifdef CONFIG_X86_32
> >  /*
> >   * This halt magic was a workaround for ancient floppy DMA
> > @@ -329,17 +324,15 @@ static void do_nothing(void *unused)
> >  }
> >  
> >  /*
> > - * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
> > - * pm_idle and update to new pm_idle value. Required while changing pm_idle
> > - * handler on SMP systems.
> > + * cpu_idle_wait - Required while changing idle routine handler on SMP systems.
> >   *
> > - * Caller must have changed pm_idle to the new value before the call. Old
> > - * pm_idle value will not be used by any CPU after the return of this function.
> > + * Caller must have changed idle routine to the new value before the call. Old
> > + * value will not be used by any CPU after the return of this function.
> >   */
> >  void cpu_idle_wait(void)
> >  {
> >         smp_mb();
> > -       /* kick all the CPUs so that they exit out of pm_idle */
> > +       /* kick all the CPUs so that they exit out of idle loop */
> >         smp_call_function(do_nothing, NULL, 1);
> >  }
> >  EXPORT_SYMBOL_GPL(cpu_idle_wait);
> > @@ -518,15 +511,59 @@ static void c1e_idle(void)
> >                 default_idle();
> >  }
> >  
> > +static void (*local_idle)(void);
> > +DEFINE_PER_CPU(struct cpuidle_device, idle_devices);
> > +
> > +struct cpuidle_driver cpuidle_default_driver = {
> > +       .name =         "cpuidle_default",
> > +};
> > +
> > +static int local_idle_loop(struct cpuidle_device *dev, struct cpuidle_state *st)
> > +{
> > +       ktime_t t1, t2;
> > +       s64 diff;
> > +       int ret;
> > +
> > +       t1 = ktime_get();
> > +       local_idle();
> > +       t2 = ktime_get();
> > +
> > +       diff = ktime_to_us(ktime_sub(t2, t1));
> > +       if (diff > INT_MAX)
> > +               diff = INT_MAX;
> > +       ret = (int) diff;
> > +
> > +       return ret;
> > +}
> > +
> > +static int setup_cpuidle_simple(void)
> > +{
> > +       struct cpuidle_device *dev;
> > +       int cpu;
> > +
> > +       if (!cpuidle_curr_driver)
> > +               cpuidle_register_driver(&cpuidle_default_driver);
> > +
> > +       for_each_online_cpu(cpu) {
> > +               dev = &per_cpu(idle_devices, cpu);
> > +               dev->cpu = cpu;
> > +               dev->states[0].enter = local_idle_loop;
> > +               dev->state_count = 1;
> > +               cpuidle_register_device(dev);
> > +       }
> > +       return 0;
> > +}
> > +device_initcall(setup_cpuidle_simple);
> > +
> >  void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
> >  {
> >  #ifdef CONFIG_SMP
> > -       if (pm_idle == poll_idle && smp_num_siblings > 1) {
> > +       if (local_idle == poll_idle && smp_num_siblings > 1) {
> >                 printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
> >                         " performance may degrade.\n");
> >         }
> >  #endif
> > -       if (pm_idle)
> > +       if (local_idle)
> >                 return;
> >  
> >         if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
> > @@ -534,18 +571,20 @@ void __cpuinit select_idle_routine(const
> >                  * One CPU supports mwait => All CPUs supports mwait
> >                  */
> >                 printk(KERN_INFO "using mwait in idle threads.\n");
> > -               pm_idle = mwait_idle;
> > +               local_idle = mwait_idle;
> >         } else if (check_c1e_idle(c)) {
> >                 printk(KERN_INFO "using C1E aware idle routine\n");
> > -               pm_idle = c1e_idle;
> > +               local_idle = c1e_idle;
> >         } else
> > -               pm_idle = default_idle;
> > +               local_idle = default_idle;
> > +
> > +       return;
> >  }
> >  
> >  void __init init_c1e_mask(void)
> >  {
> >         /* If we're using c1e_idle, we need to allocate c1e_mask. */
> > -       if (pm_idle == c1e_idle)
> > +       if (local_idle == c1e_idle)
> >                 zalloc_cpumask_var(&c1e_mask, GFP_KERNEL);
> >  }
> >  
> > @@ -556,7 +595,7 @@ static int __init idle_setup(char *str)
> >  
> >         if (!strcmp(str, "poll")) {
> >                 printk("using polling idle threads.\n");
> > -               pm_idle = poll_idle;
> > +               local_idle = poll_idle;
> >         } else if (!strcmp(str, "mwait"))
> >                 force_mwait = 1;
> >         else if (!strcmp(str, "halt")) {
> > @@ -567,7 +606,7 @@ static int __init idle_setup(char *str)
> >                  * To continue to load the CPU idle driver, don't touch
> >                  * the boot_option_idle_override.
> >                  */
> > -               pm_idle = default_idle;
> > +               local_idle = default_idle;
> >                 idle_halt = 1;
> >                 return 0;
> >         } else if (!strcmp(str, "nomwait")) {
> 
> 
> What guarantees that the cpuidle bits actually select this
> cpuidle_default driver when you do idle=poll?
> 
> Also, cpuidle already has a poll loop in it, why duplicate that?
> 

Yes, now i see it.. I'll get rid of the redundant poll_idle definition
diff mbox

Patch

Index: linux.trees.git/arch/x86/kernel/process.c
===================================================================
--- linux.trees.git.orig/arch/x86/kernel/process.c
+++ linux.trees.git/arch/x86/kernel/process.c
@@ -9,6 +9,7 @@ 
 #include <linux/pm.h>
 #include <linux/clockchips.h>
 #include <linux/random.h>
+#include <linux/cpuidle.h>
 #include <trace/events/power.h>
 #include <asm/system.h>
 #include <asm/apic.h>
@@ -244,12 +245,6 @@  int sys_vfork(struct pt_regs *regs)
 unsigned long boot_option_idle_override = 0;
 EXPORT_SYMBOL(boot_option_idle_override);
 
-/*
- * Powermanagement idle function, if any..
- */
-void (*pm_idle)(void);
-EXPORT_SYMBOL(pm_idle);
-
 #ifdef CONFIG_X86_32
 /*
  * This halt magic was a workaround for ancient floppy DMA
@@ -329,17 +324,15 @@  static void do_nothing(void *unused)
 }
 
 /*
- * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
- * pm_idle and update to new pm_idle value. Required while changing pm_idle
- * handler on SMP systems.
+ * cpu_idle_wait - Required while changing idle routine handler on SMP systems.
  *
- * Caller must have changed pm_idle to the new value before the call. Old
- * pm_idle value will not be used by any CPU after the return of this function.
+ * Caller must have changed idle routine to the new value before the call. Old
+ * value will not be used by any CPU after the return of this function.
  */
 void cpu_idle_wait(void)
 {
 	smp_mb();
-	/* kick all the CPUs so that they exit out of pm_idle */
+	/* kick all the CPUs so that they exit out of idle loop */
 	smp_call_function(do_nothing, NULL, 1);
 }
 EXPORT_SYMBOL_GPL(cpu_idle_wait);
@@ -518,15 +511,59 @@  static void c1e_idle(void)
 		default_idle();
 }
 
+static void (*local_idle)(void);
+DEFINE_PER_CPU(struct cpuidle_device, idle_devices);
+
+struct cpuidle_driver cpuidle_default_driver = {
+	.name =         "cpuidle_default",
+};
+
+static int local_idle_loop(struct cpuidle_device *dev, struct cpuidle_state *st)
+{
+	ktime_t t1, t2;
+	s64 diff;
+	int ret;
+
+	t1 = ktime_get();
+	local_idle();
+	t2 = ktime_get();
+
+	diff = ktime_to_us(ktime_sub(t2, t1));
+	if (diff > INT_MAX)
+		diff = INT_MAX;
+	ret = (int) diff;
+
+	return ret;
+}
+
+static int setup_cpuidle_simple(void)
+{
+	struct cpuidle_device *dev;
+	int cpu;
+
+	if (!cpuidle_curr_driver)
+		cpuidle_register_driver(&cpuidle_default_driver);
+
+	for_each_online_cpu(cpu) {
+		dev = &per_cpu(idle_devices, cpu);
+		dev->cpu = cpu;
+		dev->states[0].enter = local_idle_loop;
+		dev->state_count = 1;
+		cpuidle_register_device(dev);
+	}
+	return 0;
+}
+device_initcall(setup_cpuidle_simple);
+
 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
-	if (pm_idle == poll_idle && smp_num_siblings > 1) {
+	if (local_idle == poll_idle && smp_num_siblings > 1) {
 		printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
 			" performance may degrade.\n");
 	}
 #endif
-	if (pm_idle)
+	if (local_idle)
 		return;
 
 	if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
@@ -534,18 +571,20 @@  void __cpuinit select_idle_routine(const
 		 * One CPU supports mwait => All CPUs supports mwait
 		 */
 		printk(KERN_INFO "using mwait in idle threads.\n");
-		pm_idle = mwait_idle;
+		local_idle = mwait_idle;
 	} else if (check_c1e_idle(c)) {
 		printk(KERN_INFO "using C1E aware idle routine\n");
-		pm_idle = c1e_idle;
+		local_idle = c1e_idle;
 	} else
-		pm_idle = default_idle;
+		local_idle = default_idle;
+
+	return;
 }
 
 void __init init_c1e_mask(void)
 {
 	/* If we're using c1e_idle, we need to allocate c1e_mask. */
-	if (pm_idle == c1e_idle)
+	if (local_idle == c1e_idle)
 		zalloc_cpumask_var(&c1e_mask, GFP_KERNEL);
 }
 
@@ -556,7 +595,7 @@  static int __init idle_setup(char *str)
 
 	if (!strcmp(str, "poll")) {
 		printk("using polling idle threads.\n");
-		pm_idle = poll_idle;
+		local_idle = poll_idle;
 	} else if (!strcmp(str, "mwait"))
 		force_mwait = 1;
 	else if (!strcmp(str, "halt")) {
@@ -567,7 +606,7 @@  static int __init idle_setup(char *str)
 		 * To continue to load the CPU idle driver, don't touch
 		 * the boot_option_idle_override.
 		 */
-		pm_idle = default_idle;
+		local_idle = default_idle;
 		idle_halt = 1;
 		return 0;
 	} else if (!strcmp(str, "nomwait")) {
Index: linux.trees.git/arch/x86/kernel/process_32.c
===================================================================
--- linux.trees.git.orig/arch/x86/kernel/process_32.c
+++ linux.trees.git/arch/x86/kernel/process_32.c
@@ -40,6 +40,7 @@ 
 #include <linux/uaccess.h>
 #include <linux/io.h>
 #include <linux/kdebug.h>
+#include <linux/cpuidle.h>
 
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -113,7 +114,7 @@  void cpu_idle(void)
 			local_irq_disable();
 			/* Don't trace irqs off for idle */
 			stop_critical_timings();
-			pm_idle();
+			cpuidle_idle_call();
 			start_critical_timings();
 		}
 		tick_nohz_restart_sched_tick();
Index: linux.trees.git/arch/x86/kernel/process_64.c
===================================================================
--- linux.trees.git.orig/arch/x86/kernel/process_64.c
+++ linux.trees.git/arch/x86/kernel/process_64.c
@@ -39,6 +39,7 @@ 
 #include <linux/io.h>
 #include <linux/ftrace.h>
 #include <linux/dmi.h>
+#include <linux/cpuidle.h>
 
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -142,7 +143,7 @@  void cpu_idle(void)
 			enter_idle();
 			/* Don't trace irqs off for idle */
 			stop_critical_timings();
-			pm_idle();
+			cpuidle_idle_call();
 			start_critical_timings();
 			/* In many cases the interrupt that ended idle
 			   has already called exit_idle. But some idle
Index: linux.trees.git/arch/x86/kernel/apm_32.c
===================================================================
--- linux.trees.git.orig/arch/x86/kernel/apm_32.c
+++ linux.trees.git/arch/x86/kernel/apm_32.c
@@ -2257,6 +2257,38 @@  static struct dmi_system_id __initdata a
 	{ }
 };
 
+DEFINE_PER_CPU(struct cpuidle_device, apm_idle_devices);
+
+struct cpuidle_driver cpuidle_apm_driver = {
+	.name =         "cpuidle_apm",
+};
+
+void __cpuinit setup_cpuidle_apm(void)
+{
+	struct cpuidle_device *dev;
+
+	if (!cpuidle_curr_driver)
+		cpuidle_register_driver(&cpuidle_apm_driver);
+
+	dev = &per_cpu(apm_idle_devices, smp_processor_id());
+	dev->cpu = smp_processor_id();
+	dev->states[0].enter = apm_cpu_idle;
+	dev->state_count = 1;
+	cpuidle_register_device(dev);
+}
+
+void exit_cpuidle_apm(void)
+{
+	struct cpuidle_device *dev;
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		dev = &per_cpu(apm_idle_devices, cpu);
+		cpuidle_unregister_device(dev);
+	}
+}
+
+
 /*
  * Just start the APM thread. We do NOT want to do APM BIOS
  * calls from anything but the APM thread, if for no other reason
@@ -2394,8 +2426,7 @@  static int __init apm_init(void)
 	if (HZ != 100)
 		idle_period = (idle_period * HZ) / 100;
 	if (idle_threshold < 100) {
-		original_pm_idle = pm_idle;
-		pm_idle  = apm_cpu_idle;
+		setup_cpuidle_apm();
 		set_pm_idle = 1;
 	}
 
@@ -2407,7 +2438,7 @@  static void __exit apm_exit(void)
 	int error;
 
 	if (set_pm_idle) {
-		pm_idle = original_pm_idle;
+		exit_cpuidle_apm();
 		/*
 		 * We are about to unload the current idle thread pm callback
 		 * (pm_idle), Wait for all processors to update cached/local
Index: linux.trees.git/arch/x86/xen/setup.c
===================================================================
--- linux.trees.git.orig/arch/x86/xen/setup.c
+++ linux.trees.git/arch/x86/xen/setup.c
@@ -8,6 +8,7 @@ 
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/pm.h>
+#include <linux/cpuidle.h>
 
 #include <asm/elf.h>
 #include <asm/vdso.h>
@@ -151,6 +152,25 @@  void __cpuinit xen_enable_syscall(void)
 #endif /* CONFIG_X86_64 */
 }
 
+DEFINE_PER_CPU(struct cpuidle_device, xen_idle_devices);
+struct cpuidle_driver cpuidle_xen_driver = {
+	.name =         "cpuidle_xen",
+};
+
+void __cpuinit setup_cpuidle_xen(void)
+{
+	struct cpuidle_device *dev;
+
+	if (!cpuidle_curr_driver)
+		cpuidle_register_driver(&cpuidle_xen_driver);
+
+	dev = &per_cpu(xen_idle_devices, smp_processor_id());
+	dev->cpu = smp_processor_id();
+	dev->states[0].enter = xen_idle;
+	dev->state_count = 1;
+	cpuidle_register_device(dev);
+}
+
 void __init xen_arch_setup(void)
 {
 	struct physdev_set_iopl set_iopl;
@@ -186,7 +206,7 @@  void __init xen_arch_setup(void)
 	       MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ?
 	       COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE);
 
-	pm_idle = xen_idle;
+	setup_cpuidle_xen();
 
 	paravirt_disable_iospace();