Message ID | 1274227491.2370.110.camel@w-sridhar.beaverton.ibm.com |
---|---|
State | Not Applicable, archived |
Delegated to: | David Miller |
Headers | show |
On Tue, May 18, 2010 at 05:04:51PM -0700, Sridhar Samudrala wrote: > Add a new kernel API to create a singlethread workqueue and attach it's > task to current task's cgroup and cpumask. > > Signed-off-by: Sridhar Samudrala <sri@us.ibm.com> Could someone familiar with workqueue code please comment on whether this patch is suitable for 2.6.35? It is needed to fix the case where vhost user might cause a kernel thread to consume more CPU than allowed by the cgroup. Should I merge it through the vhost tree? Ack for this? Thanks! > diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h > index 9466e86..6d6f301 100644 > --- a/include/linux/workqueue.h > +++ b/include/linux/workqueue.h > @@ -211,6 +211,7 @@ __create_workqueue_key(const char *name, int singlethread, > #define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1, 0) > #define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0, 0) > > +extern struct workqueue_struct *create_singlethread_workqueue_in_current_cg(char *name); > extern void destroy_workqueue(struct workqueue_struct *wq); > > extern int queue_work(struct workqueue_struct *wq, struct work_struct *work); > diff --git a/kernel/workqueue.c b/kernel/workqueue.c > index 5bfb213..6ba226e 100644 > --- a/kernel/workqueue.c > +++ b/kernel/workqueue.c > @@ -35,6 +35,7 @@ > #include <linux/lockdep.h> > #define CREATE_TRACE_POINTS > #include <trace/events/workqueue.h> > +#include <linux/cgroup.h> > > /* > * The per-CPU workqueue (if single thread, we always use the first > @@ -193,6 +194,45 @@ static const struct cpumask *cpu_singlethread_map __read_mostly; > */ > static cpumask_var_t cpu_populated_map __read_mostly; > > +static struct task_struct *get_singlethread_wq_task(struct workqueue_struct *wq) > +{ > + return (per_cpu_ptr(wq->cpu_wq, singlethread_cpu))->thread; > +} > + > +/* Create a singlethread workqueue and attach it's task to the current task's > + * cgroup and set it's cpumask to the current task's cpumask. > + */ > +struct workqueue_struct *create_singlethread_workqueue_in_current_cg(char *name) > +{ > + struct workqueue_struct *wq; > + struct task_struct *task; > + cpumask_var_t mask; > + > + wq = create_singlethread_workqueue(name); > + if (!wq) > + goto out; > + > + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) > + goto err; > + > + if (sched_getaffinity(current->pid, mask)) > + goto err; > + > + task = get_singlethread_wq_task(wq); > + if (sched_setaffinity(task->pid, mask)) > + goto err; > + > + if (cgroup_attach_task_current_cg(task)) > + goto err; > +out: > + return wq; > +err: > + destroy_workqueue(wq); > + wq = NULL; > + goto out; > +} > +EXPORT_SYMBOL_GPL(create_singlethread_workqueue_in_current_cg); > + > /* If it's single threaded, it isn't in the list of workqueues. */ > static inline int is_wq_single_threaded(struct workqueue_struct *wq) > { > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 05/27, Michael S. Tsirkin wrote: > > On Tue, May 18, 2010 at 05:04:51PM -0700, Sridhar Samudrala wrote: > > Add a new kernel API to create a singlethread workqueue and attach it's > > task to current task's cgroup and cpumask. > > > > Signed-off-by: Sridhar Samudrala <sri@us.ibm.com> > > Could someone familiar with workqueue code please comment on whether > this patch is suitable for 2.6.35? > > It is needed to fix the case where vhost user might cause a kernel > thread to consume more CPU than allowed by the cgroup. > Should I merge it through the vhost tree? > Ack for this? I don't understand the reasons for this patch, but this doesn't matter. I don't really see any need to change workqueue.c, > > +static struct task_struct *get_singlethread_wq_task(struct workqueue_struct *wq) > > +{ > > + return (per_cpu_ptr(wq->cpu_wq, singlethread_cpu))->thread; > > +} (Not sure this trivial static helper with the single caller makes sense, but see below) > > +/* Create a singlethread workqueue and attach it's task to the current task's > > + * cgroup and set it's cpumask to the current task's cpumask. > > + */ > > +struct workqueue_struct *create_singlethread_workqueue_in_current_cg(char *name) > > +{ > > + struct workqueue_struct *wq; > > + struct task_struct *task; > > + cpumask_var_t mask; > > + > > + wq = create_singlethread_workqueue(name); > > + if (!wq) > > + goto out; > > + > > + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) > > + goto err; > > + > > + if (sched_getaffinity(current->pid, mask)) > > + goto err; > > + > > + task = get_singlethread_wq_task(wq); > > + if (sched_setaffinity(task->pid, mask)) > > + goto err; > > + > > + if (cgroup_attach_task_current_cg(task)) > > + goto err; > > +out: > > + return wq; > > +err: > > + destroy_workqueue(wq); > > + wq = NULL; > > + goto out; > > +} Instead, cgroup.c (or whoever needs this) can do struct move_struct { struct work_struct work; int ret; }; static void move_func(struct work_struct *work) { struct move_struct *move = container_of(...); if (cgroup_attach_task_current_cg(current)) ret = -EANY; } static struct workqueue_struct *create_singlethread_workqueue_in_current_cg(char *name) { struct workqueue_struct *wq; struct move_struct move = { .work = __WORK_INITIALIZER(move_func); }; wq = create_singlethread_workqueue(name); if (!wq) return NULL; queue_work(&move.work); flush_work(&move.work); if (move.ret) { destroy_workqueue(wq); wq = NULL; } return wq; } Or. Just export wq_per_cpu() from workqueue.c (probably with a better name) and use it like the patch does. But, imho, create_singlethread_workqueue_in_current_cg() does not belong to workqueue.c. Oleg. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, May 27, 2010 at 02:44:48PM +0200, Oleg Nesterov wrote: > On 05/27, Michael S. Tsirkin wrote: > > > > On Tue, May 18, 2010 at 05:04:51PM -0700, Sridhar Samudrala wrote: > > > Add a new kernel API to create a singlethread workqueue and attach it's > > > task to current task's cgroup and cpumask. > > > > > > Signed-off-by: Sridhar Samudrala <sri@us.ibm.com> > > > > Could someone familiar with workqueue code please comment on whether > > this patch is suitable for 2.6.35? > > > > It is needed to fix the case where vhost user might cause a kernel > > thread to consume more CPU than allowed by the cgroup. > > Should I merge it through the vhost tree? > > Ack for this? > > I don't understand the reasons for this patch, but this doesn't matter. Depending on userspace application, driver can create a lot of work for a workqueue to handle. By making the workqueue thread belong in a cgroup, we make it possible to the CPU and other resources thus consumed.
On 05/27, Michael S. Tsirkin wrote: > > On Thu, May 27, 2010 at 02:44:48PM +0200, Oleg Nesterov wrote: > > On 05/27, Michael S. Tsirkin wrote: > > > > > > On Tue, May 18, 2010 at 05:04:51PM -0700, Sridhar Samudrala wrote: > > > > Add a new kernel API to create a singlethread workqueue and attach it's > > > > task to current task's cgroup and cpumask. > > > > > > > > Signed-off-by: Sridhar Samudrala <sri@us.ibm.com> > > > > > > Could someone familiar with workqueue code please comment on whether > > > this patch is suitable for 2.6.35? > > > > > > It is needed to fix the case where vhost user might cause a kernel > > > thread to consume more CPU than allowed by the cgroup. > > > Should I merge it through the vhost tree? > > > Ack for this? > > > > I don't understand the reasons for this patch, but this doesn't matter. > > Depending on userspace application, driver can create a lot of work > for a workqueue to handle. By making the workqueue thread > belong in a cgroup, we make it possible to the CPU and other > resources thus consumed. OK, I see, thanks for your explanation. in case I wasn't clear... I didn't mean I dislike this idea, only the the implementation of create_singlethread_workqueue_in_current_cg(), it doesn't belong to workqueue.c imho. Oleg. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hello, On 05/27/2010 03:12 PM, Michael S. Tsirkin wrote: >> I don't understand the reasons for this patch, but this doesn't matter. > > Depending on userspace application, driver can create a lot of work > for a workqueue to handle. By making the workqueue thread > belong in a cgroup, we make it possible to the CPU and other > resources thus consumed. Hmmm.... I don't really get it. The unit of scheduling in workqueue is a work. Unless you're gonna convert every driver to use this special kind of workqueue (and what happens when multiple tasks from different cgroups share the driver?), I can't see how this is gonna be useful. If you really wanna impose cgroup control on workqueue items, you'll have to do it per work item which might lead to the problem of priority inversion. Can you please describe what you're trying to do in more detail? Thank you.
On Thu, 2010-05-27 at 14:44 +0200, Oleg Nesterov wrote: > On 05/27, Michael S. Tsirkin wrote: > > > > On Tue, May 18, 2010 at 05:04:51PM -0700, Sridhar Samudrala wrote: > > > Add a new kernel API to create a singlethread workqueue and attach it's > > > task to current task's cgroup and cpumask. > > > > > > Signed-off-by: Sridhar Samudrala <sri@us.ibm.com> > > > > Could someone familiar with workqueue code please comment on whether > > this patch is suitable for 2.6.35? > > > > It is needed to fix the case where vhost user might cause a kernel > > thread to consume more CPU than allowed by the cgroup. > > Should I merge it through the vhost tree? > > Ack for this? > > I don't understand the reasons for this patch, but this doesn't matter. > > I don't really see any need to change workqueue.c, > > > > +static struct task_struct *get_singlethread_wq_task(struct workqueue_struct *wq) > > > +{ > > > + return (per_cpu_ptr(wq->cpu_wq, singlethread_cpu))->thread; > > > +} > > (Not sure this trivial static helper with the single caller makes sense, but > see below) > > > > +/* Create a singlethread workqueue and attach it's task to the current task's > > > + * cgroup and set it's cpumask to the current task's cpumask. > > > + */ > > > +struct workqueue_struct *create_singlethread_workqueue_in_current_cg(char *name) > > > +{ > > > + struct workqueue_struct *wq; > > > + struct task_struct *task; > > > + cpumask_var_t mask; > > > + > > > + wq = create_singlethread_workqueue(name); > > > + if (!wq) > > > + goto out; > > > + > > > + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) > > > + goto err; > > > + > > > + if (sched_getaffinity(current->pid, mask)) > > > + goto err; > > > + > > > + task = get_singlethread_wq_task(wq); > > > + if (sched_setaffinity(task->pid, mask)) > > > + goto err; > > > + > > > + if (cgroup_attach_task_current_cg(task)) > > > + goto err; > > > +out: > > > + return wq; > > > +err: > > > + destroy_workqueue(wq); > > > + wq = NULL; > > > + goto out; > > > +} > > Instead, cgroup.c (or whoever needs this) can do > > struct move_struct { > struct work_struct work; > int ret; > }; > > static void move_func(struct work_struct *work) > { > struct move_struct *move = container_of(...); > > if (cgroup_attach_task_current_cg(current)) We are trying to attach the task associated with workqueue to the current task's cgroup. So what we need is cgroup_attach_task_current_cg(wq->task); However there is no interface currently that exports the task associated with a workqueue. It is hidden in cpu_workqueue_struct and is only accessible within workqueue.c. > ret = -EANY; > } > > static struct workqueue_struct *create_singlethread_workqueue_in_current_cg(char *name) > { > struct workqueue_struct *wq; > struct move_struct move = { > .work = __WORK_INITIALIZER(move_func); > }; > > wq = create_singlethread_workqueue(name); > if (!wq) > return NULL; > > queue_work(&move.work); > flush_work(&move.work); > > if (move.ret) { > destroy_workqueue(wq); > wq = NULL; > } > > return wq; > } > > Or. Just export wq_per_cpu() from workqueue.c (probably with a better name) and > use it like the patch does. This requires that struct cpu_workqueue_struct and struct workqueue_struct are made externally visible by moving them to kernel/workqueue.h. Instead what about adding the simple helper get_singlethread_wq_task() in workqueue.c and exporting it. I can add create_singlethread_workqueue_in_current_cg() to cgroup.c using this helper routine. Thanks Sridhar -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, May 27, 2010 at 06:15:54PM +0200, Tejun Heo wrote: > Hello, > > On 05/27/2010 03:12 PM, Michael S. Tsirkin wrote: > >> I don't understand the reasons for this patch, but this doesn't matter. > > > > Depending on userspace application, driver can create a lot of work > > for a workqueue to handle. By making the workqueue thread > > belong in a cgroup, we make it possible to the CPU and other > > resources thus consumed. > > Hmmm.... I don't really get it. The unit of scheduling in workqueue > is a work. Yes. However, we use cgroups to limit when the workqueue itself is scheduled. This affects all of work done on this workqueue, so it's a bit of a blunt intrument. Thus we are not trying to apply this to all drivers, we intend to start with vhost-net. > Unless you're gonna convert every driver to use this > special kind of workqueue (and what happens when multiple tasks from > different cgroups share the driver?), We'll then create a workqueue per task. Each workqueue will have the right cgroup. But we are not trying to selve the problem for every driver. > I can't see how this is gonna be > useful. If you really wanna impose cgroup control on workqueue items, > you'll have to do it per work item which might lead to the problem of > priority inversion. Exactly. cgroup is per-workqueue not per work item. If driver wants to let administrators control priority for different kinds of items separately, driver will have to submit them to separate workqueues. > Can you please describe what you're trying to do > in more detail? > > Thank you. vhost-net driver is under control from userspace, it queues potentially a lot of work into the workqueue, which might load the system beyond the cgroup limits. And staying within cgroups limits is important for virtualization where vhost is used. > -- > tejun -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, May 27, 2010 at 09:24:18AM -0700, Sridhar Samudrala wrote: > On Thu, 2010-05-27 at 14:44 +0200, Oleg Nesterov wrote: > > On 05/27, Michael S. Tsirkin wrote: > > > > > > On Tue, May 18, 2010 at 05:04:51PM -0700, Sridhar Samudrala wrote: > > > > Add a new kernel API to create a singlethread workqueue and attach it's > > > > task to current task's cgroup and cpumask. > > > > > > > > Signed-off-by: Sridhar Samudrala <sri@us.ibm.com> > > > > > > Could someone familiar with workqueue code please comment on whether > > > this patch is suitable for 2.6.35? > > > > > > It is needed to fix the case where vhost user might cause a kernel > > > thread to consume more CPU than allowed by the cgroup. > > > Should I merge it through the vhost tree? > > > Ack for this? > > > > I don't understand the reasons for this patch, but this doesn't matter. > > > > I don't really see any need to change workqueue.c, > > > > > > +static struct task_struct *get_singlethread_wq_task(struct workqueue_struct *wq) > > > > +{ > > > > + return (per_cpu_ptr(wq->cpu_wq, singlethread_cpu))->thread; > > > > +} > > > > (Not sure this trivial static helper with the single caller makes sense, but > > see below) > > > > > > +/* Create a singlethread workqueue and attach it's task to the current task's > > > > + * cgroup and set it's cpumask to the current task's cpumask. > > > > + */ > > > > +struct workqueue_struct *create_singlethread_workqueue_in_current_cg(char *name) > > > > +{ > > > > + struct workqueue_struct *wq; > > > > + struct task_struct *task; > > > > + cpumask_var_t mask; > > > > + > > > > + wq = create_singlethread_workqueue(name); > > > > + if (!wq) > > > > + goto out; > > > > + > > > > + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) > > > > + goto err; > > > > + > > > > + if (sched_getaffinity(current->pid, mask)) > > > > + goto err; > > > > + > > > > + task = get_singlethread_wq_task(wq); > > > > + if (sched_setaffinity(task->pid, mask)) > > > > + goto err; > > > > + > > > > + if (cgroup_attach_task_current_cg(task)) > > > > + goto err; > > > > +out: > > > > + return wq; > > > > +err: > > > > + destroy_workqueue(wq); > > > > + wq = NULL; > > > > + goto out; > > > > +} > > > > Instead, cgroup.c (or whoever needs this) can do > > > > struct move_struct { > > struct work_struct work; > > int ret; > > }; > > > > static void move_func(struct work_struct *work) > > { > > struct move_struct *move = container_of(...); > > > > if (cgroup_attach_task_current_cg(current)) > > We are trying to attach the task associated with workqueue to the > current task's cgroup. So what we need is > cgroup_attach_task_current_cg(wq->task); > > However there is no interface currently that exports the task associated > with a workqueue. It is hidden in cpu_workqueue_struct and is only > accessible within workqueue.c. > > > > ret = -EANY; > > } > > > > static struct workqueue_struct *create_singlethread_workqueue_in_current_cg(char *name) > > { > > struct workqueue_struct *wq; > > struct move_struct move = { > > .work = __WORK_INITIALIZER(move_func); > > }; > > > > wq = create_singlethread_workqueue(name); > > if (!wq) > > return NULL; > > > > queue_work(&move.work); > > flush_work(&move.work); > > > > if (move.ret) { > > destroy_workqueue(wq); > > wq = NULL; > > } > > > > return wq; > > } > > > > Or. Just export wq_per_cpu() from workqueue.c (probably with a better name) and > > use it like the patch does. > This requires that struct cpu_workqueue_struct and struct > workqueue_struct are made externally visible by moving them to > kernel/workqueue.h. > > Instead what about adding the simple helper get_singlethread_wq_task() > in workqueue.c and exporting it. > I can add create_singlethread_workqueue_in_current_cg() to cgroup.c > using this helper routine. Or to our driver, if that's more palatable. > Thanks > Sridhar > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hello, On 05/27/2010 06:39 PM, Michael S. Tsirkin wrote: >> Unless you're gonna convert every driver to use this >> special kind of workqueue (and what happens when multiple tasks from >> different cgroups share the driver?), > > We'll then create a workqueue per task. Each workqueue will have the > right cgroup. But we are not trying to selve the problem for > every driver. Ah... I see. You're gonna use multiple workqueues. Once concern that I have is that this is abuse of workqueue interface to certain level and depends on the implementation detail of workqueue rather than its intended usage model. stop_machine() was a similar case and in the end it was better served by a different mechanism built on kthread directly (cpu_stop). Wouldn't it be cleaner to use kthread directly for your case too? You're basically trying to use workqueue as a frontend to kthread, so... Thanks.
What I am actually worried about is Tejun's rework, I am not sure cmwq has the "this thread services that wq" property... On 05/27, Sridhar Samudrala wrote: > > On Thu, 2010-05-27 at 14:44 +0200, Oleg Nesterov wrote: > > > > Instead, cgroup.c (or whoever needs this) can do > > > > struct move_struct { > > struct work_struct work; > > int ret; > > }; > > > > static void move_func(struct work_struct *work) > > { > > struct move_struct *move = container_of(...); > > > > if (cgroup_attach_task_current_cg(current)) > > We are trying to attach the task associated with workqueue to the > current task's cgroup. So what we need is > cgroup_attach_task_current_cg(wq->task); I do not see cgroup_attach_task_current_cg() in Linus's tree and thus I do not now what exactly it does, and of course the code above is only template. But I think this is easy. Just add "struct cgroup *cgrp" into move_struct and then move_func() can do cgroup_attach_task(move->cgrp, current) ? > > Or. Just export wq_per_cpu() from workqueue.c (probably with a better name) and > > use it like the patch does. > This requires that struct cpu_workqueue_struct and struct > workqueue_struct are made externally visible by moving them to > kernel/workqueue.h. no, no, > Instead what about adding the simple helper get_singlethread_wq_task() > in workqueue.c and exporting it. Indeed, this is what I meant. But. I disagree with get_singlethread_wq_task(). If we add this helper, it should work with the multi-threaded wq's too, and needs the "int cpu" parameter, ignored when is_wq_single_threaded(). So. Either rename wq_per_cpu() and export it (once again, I do not mean we should move the body to workqueue.h!), or create the new helper which just calls wq_per_cpu(). > I can add create_singlethread_workqueue_in_current_cg() to cgroup.c > using this helper routine. Imho, this is better. But please note that it is possible to do without any changes in workqueue.[ch] afaics, see above. Oleg. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, May 27, 2010 at 06:56:20PM +0200, Tejun Heo wrote: > Hello, > > On 05/27/2010 06:39 PM, Michael S. Tsirkin wrote: > >> Unless you're gonna convert every driver to use this > >> special kind of workqueue (and what happens when multiple tasks from > >> different cgroups share the driver?), > > > > We'll then create a workqueue per task. Each workqueue will have the > > right cgroup. But we are not trying to selve the problem for > > every driver. > > Ah... I see. You're gonna use multiple workqueues. Once concern that > I have is that this is abuse of workqueue interface to certain level > and depends on the implementation detail of workqueue rather than its > intended usage model. Well, this is why I proposed adding a new API for creating workqueue within workqueue.c, rather than exposing the task and attaching it to cgroups in our driver: so that workqueue maintainers can fix the implementation if it ever changes. And after all, it's an internal API, we can always change it later if we need. > stop_machine() was a similar case and in the > end it was better served by a different mechanism built on kthread > directly (cpu_stop). Wouldn't it be cleaner to use kthread directly > for your case too? You're basically trying to use workqueue as a > frontend to kthread, so... > > Thanks. Well, yes but we are using APIs like flush_work etc. These are very handy. It seems much easier than rolling our own queue on top of kthread. Makes sense? > -- > tejun -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hello, Michael. On 05/27/2010 07:32 PM, Michael S. Tsirkin wrote: > Well, this is why I proposed adding a new API for creating > workqueue within workqueue.c, rather than exposing the task > and attaching it to cgroups in our driver: so that workqueue > maintainers can fix the implementation if it ever changes. > > And after all, it's an internal API, we can always change > it later if we need. ... > Well, yes but we are using APIs like flush_work etc. These are very > handy. It seems much easier than rolling our own queue on top of kthread. The thing is that this kind of one-off usage becomes problemetic when you're trying to change the implementation detail. All current workqueue users don't care which thread they run on and they shouldn't as each work owns the context only for the duration the work is executing. If this sort of fundamental guidelines are followed, the implementation can be improved in pretty much transparent way but when you start depending on specific implementation details, things become messy pretty quickly. If this type of usage were more common, adding proper way to account work usage according to cgroups would make sense but that's not the case here and I removed the only exception case recently while trying to implement cmwq and if this is added. So, this would be the only one which makes such extra assumptions in the whole kernel. One way or the other, workqueue needs to be improved and I don't really think adding the single exception at this point is a good idea. The thing I realized after stop_machine conversion was that there was no reason to use workqueue there at all. There already are more than enough not-too-difficult synchronization constructs and if you're using a thread for dedicated purposes, code complexity isn't that different either way. Plus, it would also be clearer that dedicated threads are required there for what reason. So, I strongly suggest using a kthread. If there are issues which are noticeably difficult to solve with kthread, we can definitely talk about that and think about things again. Thank you.
On Thu, May 27, 2010 at 11:20:22PM +0200, Tejun Heo wrote: > Hello, Michael. > > On 05/27/2010 07:32 PM, Michael S. Tsirkin wrote: > > Well, this is why I proposed adding a new API for creating > > workqueue within workqueue.c, rather than exposing the task > > and attaching it to cgroups in our driver: so that workqueue > > maintainers can fix the implementation if it ever changes. > > > > And after all, it's an internal API, we can always change > > it later if we need. > ... > > Well, yes but we are using APIs like flush_work etc. These are very > > handy. It seems much easier than rolling our own queue on top of kthread. > > The thing is that this kind of one-off usage becomes problemetic when > you're trying to change the implementation detail. All current > workqueue users don't care which thread they run on and they shouldn't > as each work owns the context only for the duration the work is > executing. If this sort of fundamental guidelines are followed, the > implementation can be improved in pretty much transparent way but when > you start depending on specific implementation details, things become > messy pretty quickly. > > If this type of usage were more common, adding proper way to account > work usage according to cgroups would make sense but that's not the > case here and I removed the only exception case recently while trying > to implement cmwq and if this is added. So, this would be the only > one which makes such extra assumptions in the whole kernel. One way > or the other, workqueue needs to be improved and I don't really think > adding the single exception at this point is a good idea. > > The thing I realized after stop_machine conversion was that there was > no reason to use workqueue there at all. There already are more than > enough not-too-difficult synchronization constructs and if you're > using a thread for dedicated purposes, code complexity isn't that > different either way. Plus, it would also be clearer that dedicated > threads are required there for what reason. So, I strongly suggest > using a kthread. If there are issues which are noticeably difficult > to solve with kthread, we can definitely talk about that and think > about things again. > > Thank you. Well, we have create_singlethread_workqueue, right? This is not very different ... is it? Just copying structures and code from workqueue.c, adding vhost_ in front of it will definitely work: there is nothing magic about the workqueue library. But this just involves cut and paste which might be best avoided. One final idea before we go the cut and paste way: how about 'create_workqueue_from_task' that would get a thread and have workqueue run there? > -- > tejun -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hello, On 05/28/2010 05:08 PM, Michael S. Tsirkin wrote: > Well, we have create_singlethread_workqueue, right? > This is not very different ... is it? > > Just copying structures and code from workqueue.c, > adding vhost_ in front of it will definitely work: Sure it will, but you'll probably be able to get away with much less. > there is nothing magic about the workqueue library. > But this just involves cut and paste which might be best avoided. What I'm saying is that some magic needs to be added to workqueue and if you add this single(!) exception, it will have to be backed out pretty soon, so it would be better to do it properly now. > One final idea before we go the cut and paste way: how about > 'create_workqueue_from_task' that would get a thread and have workqueue > run there? You can currently depend on that implementation detail but it's not the workqueue interface is meant to do. The single threadedness is there as execution ordering and concurrency specification and it doesn't (or rather won't) necessarily mean that a specific single thread is bound to certain workqueue. Can you please direct me to have a look at the code. I'll be happy to do the conversion for you. Thanks.
On Fri, May 28, 2010 at 05:54:42PM +0200, Tejun Heo wrote: > Hello, > > On 05/28/2010 05:08 PM, Michael S. Tsirkin wrote: > > Well, we have create_singlethread_workqueue, right? > > This is not very different ... is it? > > > > Just copying structures and code from workqueue.c, > > adding vhost_ in front of it will definitely work: > > Sure it will, but you'll probably be able to get away with much less. > > > there is nothing magic about the workqueue library. > > But this just involves cut and paste which might be best avoided. > > What I'm saying is that some magic needs to be added to workqueue and > if you add this single(!) exception, it will have to be backed out > pretty soon, so it would be better to do it properly now. > > > One final idea before we go the cut and paste way: how about > > 'create_workqueue_from_task' that would get a thread and have workqueue > > run there? > > You can currently depend on that implementation detail but it's not > the workqueue interface is meant to do. The single threadedness is > there as execution ordering and concurrency specification and it > doesn't (or rather won't) necessarily mean that a specific single > thread is bound to certain workqueue. > > Can you please direct me to have a look at the code. I'll be happy to > do the conversion for you. Great, thanks! The code in question is in drivers/vhost/vhost.c It is used from drivers/vhost/net.c On top of this, we have patchset from Sridhar Samudrala, titled '0/3 Make vhost multi-threaded and associate each thread to its guest's cgroup': cgroups: Add an API to attach a task to current task's cgroup workqueue: Add an API to create a singlethread workqueue attached to the current task's cgroup vhost: make it more scalable by creating a vhost thread per device I have bounced the last three your way. > Thanks. > > -- > tejun -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 9466e86..6d6f301 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -211,6 +211,7 @@ __create_workqueue_key(const char *name, int singlethread, #define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1, 0) #define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0, 0) +extern struct workqueue_struct *create_singlethread_workqueue_in_current_cg(char *name); extern void destroy_workqueue(struct workqueue_struct *wq); extern int queue_work(struct workqueue_struct *wq, struct work_struct *work); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 5bfb213..6ba226e 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -35,6 +35,7 @@ #include <linux/lockdep.h> #define CREATE_TRACE_POINTS #include <trace/events/workqueue.h> +#include <linux/cgroup.h> /* * The per-CPU workqueue (if single thread, we always use the first @@ -193,6 +194,45 @@ static const struct cpumask *cpu_singlethread_map __read_mostly; */ static cpumask_var_t cpu_populated_map __read_mostly; +static struct task_struct *get_singlethread_wq_task(struct workqueue_struct *wq) +{ + return (per_cpu_ptr(wq->cpu_wq, singlethread_cpu))->thread; +} + +/* Create a singlethread workqueue and attach it's task to the current task's + * cgroup and set it's cpumask to the current task's cpumask. + */ +struct workqueue_struct *create_singlethread_workqueue_in_current_cg(char *name) +{ + struct workqueue_struct *wq; + struct task_struct *task; + cpumask_var_t mask; + + wq = create_singlethread_workqueue(name); + if (!wq) + goto out; + + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + goto err; + + if (sched_getaffinity(current->pid, mask)) + goto err; + + task = get_singlethread_wq_task(wq); + if (sched_setaffinity(task->pid, mask)) + goto err; + + if (cgroup_attach_task_current_cg(task)) + goto err; +out: + return wq; +err: + destroy_workqueue(wq); + wq = NULL; + goto out; +} +EXPORT_SYMBOL_GPL(create_singlethread_workqueue_in_current_cg); + /* If it's single threaded, it isn't in the list of workqueues. */ static inline int is_wq_single_threaded(struct workqueue_struct *wq) {
Add a new kernel API to create a singlethread workqueue and attach it's task to current task's cgroup and cpumask. Signed-off-by: Sridhar Samudrala <sri@us.ibm.com> -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html