diff mbox

[linux-next,v3,1/2] irq: Add CPU mask affinity hint

Message ID 20100430202343.4591.66240.stgit@ppwaskie-hc2.jf.intel.com
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Waskiewicz Jr, Peter P April 30, 2010, 8:23 p.m. UTC
This patch adds a cpumask affinity hint to the irq_desc
structure, along with a registration function and a read-only
proc entry for each interrupt.

This affinity_hint handle for each interrupt can be used by
underlying drivers that need a better mechanism to control
interrupt affinity.  The underlying driver can register a
cpumask for the interrupt, which will allow the driver to
provide the CPU mask for the interrupt to anything that
requests it.  The intent is to extend the userspace daemon,
irqbalance, to help hint to it a preferred CPU mask to balance
the interrupt into.

Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
---

 include/linux/interrupt.h |    7 +++++++
 include/linux/irq.h       |    1 +
 kernel/irq/manage.c       |   19 +++++++++++++++++++
 kernel/irq/proc.c         |   42 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 69 insertions(+), 0 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Thomas Gleixner April 30, 2010, 8:24 p.m. UTC | #1
Peter,

On Fri, 30 Apr 2010, Peter P Waskiewicz Jr wrote:
>  
> +extern int irq_register_affinity_hint(unsigned int irq,
> +                                      const struct cpumask *m);

One last nitpick. 

Can you please rename to irq_set_affinity_hint() ?

Otherwise, I'm happy with the outcome. Thanks for your patience!

Thanks,

	tglx
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Waskiewicz Jr, Peter P April 30, 2010, 8:30 p.m. UTC | #2
On Fri, 30 Apr 2010, Thomas Gleixner wrote:

> Peter,
>
> On Fri, 30 Apr 2010, Peter P Waskiewicz Jr wrote:
>>
>> +extern int irq_register_affinity_hint(unsigned int irq,
>> +                                      const struct cpumask *m);
>
> One last nitpick.
>
> Can you please rename to irq_set_affinity_hint() ?

Not a problem.  It's a better name anyways now that we've dropped the 
callback function.  Patch update incoming.

>
> Otherwise, I'm happy with the outcome. Thanks for your patience!

Thanks for your help getting this pulled together.  This is a big step for 
our performance tuning goals.

Cheers,
-PJ
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ben Hutchings April 30, 2010, 9:02 p.m. UTC | #3
On Fri, 2010-04-30 at 13:23 -0700, Peter P Waskiewicz Jr wrote:
> This patch adds a cpumask affinity hint to the irq_desc
> structure, along with a registration function and a read-only
> proc entry for each interrupt.
> 
> This affinity_hint handle for each interrupt can be used by
> underlying drivers that need a better mechanism to control
> interrupt affinity.  The underlying driver can register a
> cpumask for the interrupt, which will allow the driver to
> provide the CPU mask for the interrupt to anything that
> requests it.  The intent is to extend the userspace daemon,
> irqbalance, to help hint to it a preferred CPU mask to balance
> the interrupt into.
> 
> Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
[...]
> diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
> index 704e488..1354fc9 100644
> --- a/kernel/irq/manage.c
> +++ b/kernel/irq/manage.c
> @@ -138,6 +138,22 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
>  	return 0;
>  }
>  
> +int irq_register_affinity_hint(unsigned int irq, const struct cpumask *m)
> +{
> +	struct irq_desc *desc = irq_to_desc(irq);
> +	unsigned long flags;
> +
> +	if (!desc)
> +		return -EINVAL;

Is it possible for irq_to_desc(irq) to be NULL?  This function already
assumes that the caller 'owns' the IRQ.

> +	raw_spin_lock_irqsave(&desc->lock, flags);
> +	desc->affinity_hint = m;
> +	raw_spin_unlock_irqrestore(&desc->lock, flags);
> +
> +	return 0;
> +}
[...]
> diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
> index 7a6eb04..1aa7939 100644
> --- a/kernel/irq/proc.c
> +++ b/kernel/irq/proc.c
> @@ -32,6 +32,32 @@ static int irq_affinity_proc_show(struct seq_file *m, void *v)
>  	return 0;
>  }
>  
> +static int irq_affinity_hint_proc_show(struct seq_file *m, void *v)
> +{
> +	struct irq_desc *desc = irq_to_desc((long)m->private);
> +	unsigned long flags;
> +	cpumask_var_t mask;
> +	int ret = -EINVAL;
> +
> +	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
> +		return -ENOMEM;
> +
> +	raw_spin_lock_irqsave(&desc->lock, flags);
> +	if (desc->affinity_hint) {
> +		cpumask_copy(mask, desc->affinity_hint);
> +		ret = 0;
> +	}
> +	raw_spin_unlock_irqrestore(&desc->lock, flags);
> +
> +	if (!ret) {
> +		seq_cpumask(m, mask);
> +		seq_putc(m, '\n');
> +	}
> +	free_cpumask_var(mask);
> +
> +	return ret;
> +}
[...]

I don't think this should be returning -EINVAL if the affinity hint is
missing.  That means 'invalid argument', but there is nothing invalid
about trying to read() the corresponding file.  The file should simply
be empty if there is no hint.  (Actually it might be better if it didn't
appear at all, but that would be a pain to implement.)

Ben.
Thomas Gleixner April 30, 2010, 9:17 p.m. UTC | #4
On Fri, 30 Apr 2010, Ben Hutchings wrote:
> On Fri, 2010-04-30 at 13:23 -0700, Peter P Waskiewicz Jr wrote:
> > +int irq_register_affinity_hint(unsigned int irq, const struct cpumask *m)
> > +{
> > +	struct irq_desc *desc = irq_to_desc(irq);
> > +	unsigned long flags;
> > +
> > +	if (!desc)
> > +		return -EINVAL;
> 
> Is it possible for irq_to_desc(irq) to be NULL?  This function already
> assumes that the caller 'owns' the IRQ.

Oh come on. Driver writers get everything wrong and not checking on an
invalid irq number is better than crashing :)

> > +static int irq_affinity_hint_proc_show(struct seq_file *m, void *v)
> > +{
> > +	struct irq_desc *desc = irq_to_desc((long)m->private);
> > +	unsigned long flags;
> > +	cpumask_var_t mask;
> > +	int ret = -EINVAL;
> 
> I don't think this should be returning -EINVAL if the affinity hint is
> missing.  That means 'invalid argument', but there is nothing invalid
> about trying to read() the corresponding file.  The file should simply
> be empty if there is no hint.  (Actually it might be better if it didn't
> appear at all, but that would be a pain to implement.)

I agree that -EINVAL is not really a good match.

How about just returning CPU_MASK_ALL if desc->affinity_hint is not
set ?

Thanks,

	tglx
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Waskiewicz Jr, Peter P April 30, 2010, 9:18 p.m. UTC | #5
On Fri, 30 Apr 2010, Thomas Gleixner wrote:

> On Fri, 30 Apr 2010, Ben Hutchings wrote:
>> On Fri, 2010-04-30 at 13:23 -0700, Peter P Waskiewicz Jr wrote:
>>> +int irq_register_affinity_hint(unsigned int irq, const struct cpumask *m)
>>> +{
>>> +	struct irq_desc *desc = irq_to_desc(irq);
>>> +	unsigned long flags;
>>> +
>>> +	if (!desc)
>>> +		return -EINVAL;
>>
>> Is it possible for irq_to_desc(irq) to be NULL?  This function already
>> assumes that the caller 'owns' the IRQ.
>
> Oh come on. Driver writers get everything wrong and not checking on an
> invalid irq number is better than crashing :)
>
>>> +static int irq_affinity_hint_proc_show(struct seq_file *m, void *v)
>>> +{
>>> +	struct irq_desc *desc = irq_to_desc((long)m->private);
>>> +	unsigned long flags;
>>> +	cpumask_var_t mask;
>>> +	int ret = -EINVAL;
>>
>> I don't think this should be returning -EINVAL if the affinity hint is
>> missing.  That means 'invalid argument', but there is nothing invalid
>> about trying to read() the corresponding file.  The file should simply
>> be empty if there is no hint.  (Actually it might be better if it didn't
>> appear at all, but that would be a pain to implement.)
>
> I agree that -EINVAL is not really a good match.
>
> How about just returning CPU_MASK_ALL if desc->affinity_hint is not
> set ?

That seems reasonable to me.

cheers,
-PJ
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 75f3f00..4d1df9b 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -209,6 +209,8 @@  extern int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask);
 extern int irq_can_set_affinity(unsigned int irq);
 extern int irq_select_affinity(unsigned int irq);
 
+extern int irq_register_affinity_hint(unsigned int irq,
+                                      const struct cpumask *m);
 #else /* CONFIG_SMP */
 
 static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m)
@@ -223,6 +225,11 @@  static inline int irq_can_set_affinity(unsigned int irq)
 
 static inline int irq_select_affinity(unsigned int irq)  { return 0; }
 
+static inline int irq_register_affinity_hint(unsigned int irq,
+                                             const struct cpumask *m)
+{
+	return -EINVAL;
+}
 #endif /* CONFIG_SMP && CONFIG_GENERIC_HARDIRQS */
 
 #ifdef CONFIG_GENERIC_HARDIRQS
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 707ab12..83b16d7 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -206,6 +206,7 @@  struct irq_desc {
 	struct proc_dir_entry	*dir;
 #endif
 	const char		*name;
+	struct cpumask		*affinity_hint;
 } ____cacheline_internodealigned_in_smp;
 
 extern void arch_init_copy_chip_data(struct irq_desc *old_desc,
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 704e488..1354fc9 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -138,6 +138,22 @@  int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 	return 0;
 }
 
+int irq_register_affinity_hint(unsigned int irq, const struct cpumask *m)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	unsigned long flags;
+
+	if (!desc)
+		return -EINVAL;
+
+	raw_spin_lock_irqsave(&desc->lock, flags);
+	desc->affinity_hint = m;
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(irq_register_affinity_hint);
+
 #ifndef CONFIG_AUTO_IRQ_AFFINITY
 /*
  * Generic version of the affinity autoselector.
@@ -916,6 +932,9 @@  static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
 			desc->chip->disable(irq);
 	}
 
+	/* make sure affinity_hint is cleaned up */
+	desc->affinity_hint = NULL;
+
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
 
 	unregister_handler_proc(irq, action);
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 7a6eb04..1aa7939 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -32,6 +32,32 @@  static int irq_affinity_proc_show(struct seq_file *m, void *v)
 	return 0;
 }
 
+static int irq_affinity_hint_proc_show(struct seq_file *m, void *v)
+{
+	struct irq_desc *desc = irq_to_desc((long)m->private);
+	unsigned long flags;
+	cpumask_var_t mask;
+	int ret = -EINVAL;
+
+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	raw_spin_lock_irqsave(&desc->lock, flags);
+	if (desc->affinity_hint) {
+		cpumask_copy(mask, desc->affinity_hint);
+		ret = 0;
+	}
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
+
+	if (!ret) {
+		seq_cpumask(m, mask);
+		seq_putc(m, '\n');
+	}
+	free_cpumask_var(mask);
+
+	return ret;
+}
+
 #ifndef is_affinity_mask_valid
 #define is_affinity_mask_valid(val) 1
 #endif
@@ -84,6 +110,11 @@  static int irq_affinity_proc_open(struct inode *inode, struct file *file)
 	return single_open(file, irq_affinity_proc_show, PDE(inode)->data);
 }
 
+static int irq_affinity_hint_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, irq_affinity_hint_proc_show, PDE(inode)->data);
+}
+
 static const struct file_operations irq_affinity_proc_fops = {
 	.open		= irq_affinity_proc_open,
 	.read		= seq_read,
@@ -92,6 +123,13 @@  static const struct file_operations irq_affinity_proc_fops = {
 	.write		= irq_affinity_proc_write,
 };
 
+static const struct file_operations irq_affinity_hint_proc_fops = {
+	.open		= irq_affinity_hint_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 static int default_affinity_show(struct seq_file *m, void *v)
 {
 	seq_cpumask(m, irq_default_affinity);
@@ -231,6 +269,10 @@  void register_irq_proc(unsigned int irq, struct irq_desc *desc)
 	/* create /proc/irq/<irq>/smp_affinity */
 	proc_create_data("smp_affinity", 0600, desc->dir,
 			 &irq_affinity_proc_fops, (void *)(long)irq);
+
+	/* create /proc/irq/<irq>/affinity_hint */
+	proc_create_data("affinity_hint", 0400, desc->dir,
+			 &irq_affinity_hint_proc_fops, (void *)(long)irq);
 #endif
 
 	proc_create_data("spurious", 0444, desc->dir,