Message ID | 1316793027-18367-2-git-send-email-apw@canonical.com |
---|---|
State | New |
Headers | show |
On 09/23/2011 09:50 AM, Andy Whitcroft wrote: > During acpi_init we map and unmap a large number of mappings. The existing > code uses 'simple' RCU based free, triggering a full sychronize_rcu() > against the caller. If the machine is busy (for example unpacking the > initramfs) at the time this triggers a significant delay until all cpus > rendevous. Move those to a delayed RCU free callback. > > For our reference platform this cuts the average acpi_init time in half, > dropping boot time by .25s. > > Signed-off-by: Andy Whitcroft<apw@canonical.com> > --- > drivers/acpi/atomicio.c | 12 +++++++++--- > drivers/acpi/osl.c | 16 +++++++++++----- > 2 files changed, 20 insertions(+), 8 deletions(-) > > diff --git a/drivers/acpi/atomicio.c b/drivers/acpi/atomicio.c > index 7489b89..0a7a554 100644 > --- a/drivers/acpi/atomicio.c > +++ b/drivers/acpi/atomicio.c > @@ -49,6 +49,7 @@ struct acpi_iomap { > unsigned long size; > phys_addr_t paddr; > struct kref ref; > + struct rcu_head iomap_rcu; > }; > > /* acpi_iomaps_lock or RCU read lock must be held before calling */ > @@ -161,6 +162,13 @@ static void __acpi_kref_del_iomap(struct kref *ref) > * Used to post-unmap the specified IO memory area. The iounmap is > * done only if the reference count goes zero. > */ > +static void acpi_post_unmap_rcu(struct rcu_head *rcu) > +{ > + struct acpi_iomap *map = container_of(rcu, > + struct acpi_iomap, iomap_rcu); > + iounmap(map->vaddr); > + kfree(map); > +} > static void acpi_post_unmap(phys_addr_t paddr, unsigned long size) > { > struct acpi_iomap *map; > @@ -176,9 +184,7 @@ static void acpi_post_unmap(phys_addr_t paddr, unsigned long size) > if (!del) > return; > > - synchronize_rcu(); > - iounmap(map->vaddr); > - kfree(map); > + call_rcu(&map->iomap_rcu, acpi_post_unmap_rcu); > } > > /* In NMI handler, should set silent = 1 */ > diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c > index 372f9b7..9b3a520 100644 > --- a/drivers/acpi/osl.c > +++ b/drivers/acpi/osl.c > @@ -105,6 +105,7 @@ struct acpi_ioremap { > acpi_physical_address phys; > acpi_size size; > unsigned long refcount; > + struct rcu_head map_rcu; > }; > > static LIST_HEAD(acpi_ioremaps); > @@ -373,13 +374,18 @@ static void acpi_os_drop_map_ref(struct acpi_ioremap *map) > list_del_rcu(&map->list); > } > > +static void acpi_os_map_cleanup_rcu(struct rcu_head *rcu) > +{ > + struct acpi_ioremap *map = container_of(rcu, > + struct acpi_ioremap, map_rcu); > + iounmap(map->virt); > + kfree(map); > +} > + > static void acpi_os_map_cleanup(struct acpi_ioremap *map) > { > - if (!map->refcount) { > - synchronize_rcu(); > - iounmap(map->virt); > - kfree(map); > - } > + if (!map->refcount) > + call_rcu(&map->map_rcu, acpi_os_map_cleanup_rcu); > } > > void __ref acpi_os_unmap_memory(void __iomem *virt, acpi_size size) Should this be 'UBUNTU: SAUCE:' until its upstreamed (which I think is a reasonable thing to do)? Acked-by: Tim Gardner <tim.gardner@canonical.com>
On Fri, Sep 23, 2011 at 10:23:03AM -0600, Tim Gardner wrote: > On 09/23/2011 09:50 AM, Andy Whitcroft wrote: > >During acpi_init we map and unmap a large number of mappings. The existing > >code uses 'simple' RCU based free, triggering a full sychronize_rcu() > >against the caller. If the machine is busy (for example unpacking the > >initramfs) at the time this triggers a significant delay until all cpus > >rendevous. Move those to a delayed RCU free callback. > > > >For our reference platform this cuts the average acpi_init time in half, > >dropping boot time by .25s. > > > >Signed-off-by: Andy Whitcroft<apw@canonical.com> > >--- > > drivers/acpi/atomicio.c | 12 +++++++++--- > > drivers/acpi/osl.c | 16 +++++++++++----- > > 2 files changed, 20 insertions(+), 8 deletions(-) > > > >diff --git a/drivers/acpi/atomicio.c b/drivers/acpi/atomicio.c > >index 7489b89..0a7a554 100644 > >--- a/drivers/acpi/atomicio.c > >+++ b/drivers/acpi/atomicio.c > >@@ -49,6 +49,7 @@ struct acpi_iomap { > > unsigned long size; > > phys_addr_t paddr; > > struct kref ref; > >+ struct rcu_head iomap_rcu; > > }; > > > > /* acpi_iomaps_lock or RCU read lock must be held before calling */ > >@@ -161,6 +162,13 @@ static void __acpi_kref_del_iomap(struct kref *ref) > > * Used to post-unmap the specified IO memory area. The iounmap is > > * done only if the reference count goes zero. > > */ > >+static void acpi_post_unmap_rcu(struct rcu_head *rcu) > >+{ > >+ struct acpi_iomap *map = container_of(rcu, > >+ struct acpi_iomap, iomap_rcu); > >+ iounmap(map->vaddr); > >+ kfree(map); > >+} > > static void acpi_post_unmap(phys_addr_t paddr, unsigned long size) > > { > > struct acpi_iomap *map; > >@@ -176,9 +184,7 @@ static void acpi_post_unmap(phys_addr_t paddr, unsigned long size) > > if (!del) > > return; > > > >- synchronize_rcu(); > >- iounmap(map->vaddr); > >- kfree(map); > >+ call_rcu(&map->iomap_rcu, acpi_post_unmap_rcu); > > } > > > > /* In NMI handler, should set silent = 1 */ > >diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c > >index 372f9b7..9b3a520 100644 > >--- a/drivers/acpi/osl.c > >+++ b/drivers/acpi/osl.c > >@@ -105,6 +105,7 @@ struct acpi_ioremap { > > acpi_physical_address phys; > > acpi_size size; > > unsigned long refcount; > >+ struct rcu_head map_rcu; > > }; > > > > static LIST_HEAD(acpi_ioremaps); > >@@ -373,13 +374,18 @@ static void acpi_os_drop_map_ref(struct acpi_ioremap *map) > > list_del_rcu(&map->list); > > } > > > >+static void acpi_os_map_cleanup_rcu(struct rcu_head *rcu) > >+{ > >+ struct acpi_ioremap *map = container_of(rcu, > >+ struct acpi_ioremap, map_rcu); > >+ iounmap(map->virt); > >+ kfree(map); > >+} > >+ > > static void acpi_os_map_cleanup(struct acpi_ioremap *map) > > { > >- if (!map->refcount) { > >- synchronize_rcu(); > >- iounmap(map->virt); > >- kfree(map); > >- } > >+ if (!map->refcount) > >+ call_rcu(&map->map_rcu, acpi_os_map_cleanup_rcu); > > } > > > > void __ref acpi_os_unmap_memory(void __iomem *virt, acpi_size size) > > Should this be 'UBUNTU: SAUCE:' until its upstreamed (which I think > is a reasonable thing to do)? Yes it should. > Acked-by: Tim Gardner <tim.gardner@canonical.com> -apw
On 09/23/2011 08:50 AM, Andy Whitcroft wrote: > During acpi_init we map and unmap a large number of mappings. The existing > code uses 'simple' RCU based free, triggering a full sychronize_rcu() > against the caller. If the machine is busy (for example unpacking the > initramfs) at the time this triggers a significant delay until all cpus > rendevous. Move those to a delayed RCU free callback. > > For our reference platform this cuts the average acpi_init time in half, > dropping boot time by .25s. > > Signed-off-by: Andy Whitcroft <apw@canonical.com> looks good Acked-by: John Johansen <john.johansen@canonical.com> > --- > drivers/acpi/atomicio.c | 12 +++++++++--- > drivers/acpi/osl.c | 16 +++++++++++----- > 2 files changed, 20 insertions(+), 8 deletions(-) > > diff --git a/drivers/acpi/atomicio.c b/drivers/acpi/atomicio.c > index 7489b89..0a7a554 100644 > --- a/drivers/acpi/atomicio.c > +++ b/drivers/acpi/atomicio.c > @@ -49,6 +49,7 @@ struct acpi_iomap { > unsigned long size; > phys_addr_t paddr; > struct kref ref; > + struct rcu_head iomap_rcu; > }; > > /* acpi_iomaps_lock or RCU read lock must be held before calling */ > @@ -161,6 +162,13 @@ static void __acpi_kref_del_iomap(struct kref *ref) > * Used to post-unmap the specified IO memory area. The iounmap is > * done only if the reference count goes zero. > */ > +static void acpi_post_unmap_rcu(struct rcu_head *rcu) > +{ > + struct acpi_iomap *map = container_of(rcu, > + struct acpi_iomap, iomap_rcu); > + iounmap(map->vaddr); > + kfree(map); > +} > static void acpi_post_unmap(phys_addr_t paddr, unsigned long size) > { > struct acpi_iomap *map; > @@ -176,9 +184,7 @@ static void acpi_post_unmap(phys_addr_t paddr, unsigned long size) > if (!del) > return; > > - synchronize_rcu(); > - iounmap(map->vaddr); > - kfree(map); > + call_rcu(&map->iomap_rcu, acpi_post_unmap_rcu); > } > > /* In NMI handler, should set silent = 1 */ > diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c > index 372f9b7..9b3a520 100644 > --- a/drivers/acpi/osl.c > +++ b/drivers/acpi/osl.c > @@ -105,6 +105,7 @@ struct acpi_ioremap { > acpi_physical_address phys; > acpi_size size; > unsigned long refcount; > + struct rcu_head map_rcu; > }; > > static LIST_HEAD(acpi_ioremaps); > @@ -373,13 +374,18 @@ static void acpi_os_drop_map_ref(struct acpi_ioremap *map) > list_del_rcu(&map->list); > } > > +static void acpi_os_map_cleanup_rcu(struct rcu_head *rcu) > +{ > + struct acpi_ioremap *map = container_of(rcu, > + struct acpi_ioremap, map_rcu); > + iounmap(map->virt); > + kfree(map); > +} > + > static void acpi_os_map_cleanup(struct acpi_ioremap *map) > { > - if (!map->refcount) { > - synchronize_rcu(); > - iounmap(map->virt); > - kfree(map); > - } > + if (!map->refcount) > + call_rcu(&map->map_rcu, acpi_os_map_cleanup_rcu); > } > > void __ref acpi_os_unmap_memory(void __iomem *virt, acpi_size size)
On Fri, 2011-09-23 at 16:50 +0100, Andy Whitcroft wrote: > During acpi_init we map and unmap a large number of mappings. The existing > code uses 'simple' RCU based free, triggering a full sychronize_rcu() > against the caller. If the machine is busy (for example unpacking the > initramfs) at the time this triggers a significant delay until all cpus > rendevous. Move those to a delayed RCU free callback. > > For our reference platform this cuts the average acpi_init time in half, > dropping boot time by .25s. > > Signed-off-by: Andy Whitcroft <apw@canonical.com> Acked-by: Leann Ogasawara <leann.ogasawara@canonical.com> Applied to Oneiric master-next. As you noted, this is a performance improvement so I'm going to let it bake in master-next until the first Oneiric SRU. Thanks, Leann > --- > drivers/acpi/atomicio.c | 12 +++++++++--- > drivers/acpi/osl.c | 16 +++++++++++----- > 2 files changed, 20 insertions(+), 8 deletions(-) > > diff --git a/drivers/acpi/atomicio.c b/drivers/acpi/atomicio.c > index 7489b89..0a7a554 100644 > --- a/drivers/acpi/atomicio.c > +++ b/drivers/acpi/atomicio.c > @@ -49,6 +49,7 @@ struct acpi_iomap { > unsigned long size; > phys_addr_t paddr; > struct kref ref; > + struct rcu_head iomap_rcu; > }; > > /* acpi_iomaps_lock or RCU read lock must be held before calling */ > @@ -161,6 +162,13 @@ static void __acpi_kref_del_iomap(struct kref *ref) > * Used to post-unmap the specified IO memory area. The iounmap is > * done only if the reference count goes zero. > */ > +static void acpi_post_unmap_rcu(struct rcu_head *rcu) > +{ > + struct acpi_iomap *map = container_of(rcu, > + struct acpi_iomap, iomap_rcu); > + iounmap(map->vaddr); > + kfree(map); > +} > static void acpi_post_unmap(phys_addr_t paddr, unsigned long size) > { > struct acpi_iomap *map; > @@ -176,9 +184,7 @@ static void acpi_post_unmap(phys_addr_t paddr, unsigned long size) > if (!del) > return; > > - synchronize_rcu(); > - iounmap(map->vaddr); > - kfree(map); > + call_rcu(&map->iomap_rcu, acpi_post_unmap_rcu); > } > > /* In NMI handler, should set silent = 1 */ > diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c > index 372f9b7..9b3a520 100644 > --- a/drivers/acpi/osl.c > +++ b/drivers/acpi/osl.c > @@ -105,6 +105,7 @@ struct acpi_ioremap { > acpi_physical_address phys; > acpi_size size; > unsigned long refcount; > + struct rcu_head map_rcu; > }; > > static LIST_HEAD(acpi_ioremaps); > @@ -373,13 +374,18 @@ static void acpi_os_drop_map_ref(struct acpi_ioremap *map) > list_del_rcu(&map->list); > } > > +static void acpi_os_map_cleanup_rcu(struct rcu_head *rcu) > +{ > + struct acpi_ioremap *map = container_of(rcu, > + struct acpi_ioremap, map_rcu); > + iounmap(map->virt); > + kfree(map); > +} > + > static void acpi_os_map_cleanup(struct acpi_ioremap *map) > { > - if (!map->refcount) { > - synchronize_rcu(); > - iounmap(map->virt); > - kfree(map); > - } > + if (!map->refcount) > + call_rcu(&map->map_rcu, acpi_os_map_cleanup_rcu); > } > > void __ref acpi_os_unmap_memory(void __iomem *virt, acpi_size size) > -- > 1.7.4.1 > >
diff --git a/drivers/acpi/atomicio.c b/drivers/acpi/atomicio.c index 7489b89..0a7a554 100644 --- a/drivers/acpi/atomicio.c +++ b/drivers/acpi/atomicio.c @@ -49,6 +49,7 @@ struct acpi_iomap { unsigned long size; phys_addr_t paddr; struct kref ref; + struct rcu_head iomap_rcu; }; /* acpi_iomaps_lock or RCU read lock must be held before calling */ @@ -161,6 +162,13 @@ static void __acpi_kref_del_iomap(struct kref *ref) * Used to post-unmap the specified IO memory area. The iounmap is * done only if the reference count goes zero. */ +static void acpi_post_unmap_rcu(struct rcu_head *rcu) +{ + struct acpi_iomap *map = container_of(rcu, + struct acpi_iomap, iomap_rcu); + iounmap(map->vaddr); + kfree(map); +} static void acpi_post_unmap(phys_addr_t paddr, unsigned long size) { struct acpi_iomap *map; @@ -176,9 +184,7 @@ static void acpi_post_unmap(phys_addr_t paddr, unsigned long size) if (!del) return; - synchronize_rcu(); - iounmap(map->vaddr); - kfree(map); + call_rcu(&map->iomap_rcu, acpi_post_unmap_rcu); } /* In NMI handler, should set silent = 1 */ diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 372f9b7..9b3a520 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -105,6 +105,7 @@ struct acpi_ioremap { acpi_physical_address phys; acpi_size size; unsigned long refcount; + struct rcu_head map_rcu; }; static LIST_HEAD(acpi_ioremaps); @@ -373,13 +374,18 @@ static void acpi_os_drop_map_ref(struct acpi_ioremap *map) list_del_rcu(&map->list); } +static void acpi_os_map_cleanup_rcu(struct rcu_head *rcu) +{ + struct acpi_ioremap *map = container_of(rcu, + struct acpi_ioremap, map_rcu); + iounmap(map->virt); + kfree(map); +} + static void acpi_os_map_cleanup(struct acpi_ioremap *map) { - if (!map->refcount) { - synchronize_rcu(); - iounmap(map->virt); - kfree(map); - } + if (!map->refcount) + call_rcu(&map->map_rcu, acpi_os_map_cleanup_rcu); } void __ref acpi_os_unmap_memory(void __iomem *virt, acpi_size size)
During acpi_init we map and unmap a large number of mappings. The existing code uses 'simple' RCU based free, triggering a full sychronize_rcu() against the caller. If the machine is busy (for example unpacking the initramfs) at the time this triggers a significant delay until all cpus rendevous. Move those to a delayed RCU free callback. For our reference platform this cuts the average acpi_init time in half, dropping boot time by .25s. Signed-off-by: Andy Whitcroft <apw@canonical.com> --- drivers/acpi/atomicio.c | 12 +++++++++--- drivers/acpi/osl.c | 16 +++++++++++----- 2 files changed, 20 insertions(+), 8 deletions(-)