diff mbox series

[SRU,F,1/1] UBUNTU: SAUCE: overlayfs: fix reference count mismatch

Message ID 20230622093731.632666-3-andrea.righi@canonical.com
State New
Headers show
Series [SRU,F,1/1] UBUNTU: SAUCE: overlayfs: fix reference count mismatch | expand

Commit Message

Andrea Righi June 22, 2023, 9:37 a.m. UTC
BugLink: https://bugs.launchpad.net/bugs/2016398

Opened files reported in /proc/pid/map_files can be shows with the wrong
mount point using overlayfs with filesystem namspaces.

This incorrect behavior is fixed:

  UBUNTU: SAUCE: overlayfs: fix incorrect mnt_id of files opened from map_files

However, the fix introduced a new regression, the reference to the
original file stored in vma->vm_prfile is not properly released when
vma->vm_prfile is replaced with a new file.

This can cause a reference counter unbalance, leading errors such as
"target is busy" when trying to unmount overlayfs, even if the
filesystem has not active reference.

Fix by properly releasing the original file stored in vm_prfile.

Fixes: 508fdae3f62dd ("UBUNTU: SAUCE: overlayfs: fix incorrect mnt_id of files opened from map_files")
Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
---
 fs/overlayfs/file.c | 59 +++++++++++++++++++++++++++------------------
 1 file changed, 36 insertions(+), 23 deletions(-)

Comments

Alexander Mikhalitsyn June 22, 2023, 10:14 a.m. UTC | #1
On Thu, Jun 22, 2023 at 11:38 AM Andrea Righi
<andrea.righi@canonical.com> wrote:
>
> BugLink: https://bugs.launchpad.net/bugs/2016398
>
> Opened files reported in /proc/pid/map_files can be shows with the wrong
> mount point using overlayfs with filesystem namspaces.
>
> This incorrect behavior is fixed:
>
>   UBUNTU: SAUCE: overlayfs: fix incorrect mnt_id of files opened from map_files
>
> However, the fix introduced a new regression, the reference to the
> original file stored in vma->vm_prfile is not properly released when
> vma->vm_prfile is replaced with a new file.
>
> This can cause a reference counter unbalance, leading errors such as
> "target is busy" when trying to unmount overlayfs, even if the
> filesystem has not active reference.
>
> Fix by properly releasing the original file stored in vm_prfile.
>
> Fixes: 508fdae3f62dd ("UBUNTU: SAUCE: overlayfs: fix incorrect mnt_id of files opened from map_files")
> Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
> ---
>  fs/overlayfs/file.c | 59 +++++++++++++++++++++++++++------------------
>  1 file changed, 36 insertions(+), 23 deletions(-)
>
> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> index 366a4267d5f8..565010d2c9dd 100644
> --- a/fs/overlayfs/file.c
> +++ b/fs/overlayfs/file.c
> @@ -373,17 +373,48 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
>         return ret;
>  }
>
> -/* handle vma->vm_prfile */
> +/*
> + * In map_files_get_link() (fs/proc/base.c)
> + * we need to determine correct path from overlayfs.
> + * But real_mount(realfile->f_path.mnt) may be not
> + * equal to real_mount(file->f_path.mnt). In such case
> + * fdinfo of the same file which was opened from
> + * /proc/<pid>/map_files/... and "usual" path
> + * will show different mnt_id.
> + *
> + * We solve issue like in aufs by using additional
> + * field on struct vm_area_struct called "vm_prfile"
> + * which is used only for fdinfo/"printing" needs.
> + *
> + * See also mm/prfile.c
> + */
> +#ifdef CONFIG_MMU
>  static void ovl_vm_prfile_set(struct vm_area_struct *vma,
>                               struct file *file)
>  {
>         get_file(file);
> -       vma->vm_prfile = file;
> -#ifndef CONFIG_MMU
> +       swap(vma->vm_prfile, file);
> +       /* Drop reference count from previous file value */
> +       if (file)
> +               fput(file);
> +}
> +#else
> +static void ovl_vm_prfile_set(struct vm_area_struct *vma,
> +                             struct file *file)
> +{
> +       struct file *vm_region_file = file;
> +
>         get_file(file);
> -       vma->vm_region->vm_prfile = file;
> -#endif
> +       get_file(vm_region_file);
> +       swap(vma->vm_prfile, file);
> +       swap(vma->vm_region->vm_prfile, vm_region_file);
> +       /* Drop reference count from previous file values */
> +       if (file)
> +               fput(file);
> +       if (vm_region_file)
> +               fput(vm_region_file);
>  }
> +#endif
>
>  static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
>  {
> @@ -411,25 +442,7 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
>                 vma->vm_file = file;
>                 fput(realfile);
>         } else {
> -               /*
> -                * In map_files_get_link() (fs/proc/base.c)
> -                * we need to determine correct path from overlayfs.
> -                * But real_mount(realfile->f_path.mnt) may be not
> -                * equal to real_mount(file->f_path.mnt). In such case
> -                * fdinfo of the same file which was opened from
> -                * /proc/<pid>/map_files/... and "usual" path
> -                * will show different mnt_id.
> -                *
> -                * We solve issue like in aufs by using additional
> -                * field on struct vm_area_struct called "vm_prfile"
> -                * which is used only for fdinfo/"printing" needs.
> -                *
> -                * See also mm/prfile.c
> -                */
>                 ovl_vm_prfile_set(vma, file);
> -
> -               /* Drop reference count from previous vm_file value */
> -               fput(file);
>         }
>
>         ovl_file_accessed(file);
> --
> 2.40.1
>
>
> --

Dear Andrea,

I'm almost sure that Focal is not affected by this issue. This issue
appeared after rebasing to a newer kernel version.
Have you checked that Focal is affected?

Kind regards,
Alex

> kernel-team mailing list
> kernel-team@lists.ubuntu.com
> https://lists.ubuntu.com/mailman/listinfo/kernel-team
Andrea Righi June 22, 2023, 12:20 p.m. UTC | #2
On Thu, Jun 22, 2023 at 12:14:26PM +0200, Aleksandr Mikhalitsyn wrote:
> On Thu, Jun 22, 2023 at 11:38 AM Andrea Righi
> <andrea.righi@canonical.com> wrote:
> >
> > BugLink: https://bugs.launchpad.net/bugs/2016398
> >
> > Opened files reported in /proc/pid/map_files can be shows with the wrong
> > mount point using overlayfs with filesystem namspaces.
> >
> > This incorrect behavior is fixed:
> >
> >   UBUNTU: SAUCE: overlayfs: fix incorrect mnt_id of files opened from map_files
> >
> > However, the fix introduced a new regression, the reference to the
> > original file stored in vma->vm_prfile is not properly released when
> > vma->vm_prfile is replaced with a new file.
> >
> > This can cause a reference counter unbalance, leading errors such as
> > "target is busy" when trying to unmount overlayfs, even if the
> > filesystem has not active reference.
> >
> > Fix by properly releasing the original file stored in vm_prfile.
> >
> > Fixes: 508fdae3f62dd ("UBUNTU: SAUCE: overlayfs: fix incorrect mnt_id of files opened from map_files")
> > Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
> > ---
> >  fs/overlayfs/file.c | 59 +++++++++++++++++++++++++++------------------
> >  1 file changed, 36 insertions(+), 23 deletions(-)
> >
> > diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> > index 366a4267d5f8..565010d2c9dd 100644
> > --- a/fs/overlayfs/file.c
> > +++ b/fs/overlayfs/file.c
> > @@ -373,17 +373,48 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
> >         return ret;
> >  }
> >
> > -/* handle vma->vm_prfile */
> > +/*
> > + * In map_files_get_link() (fs/proc/base.c)
> > + * we need to determine correct path from overlayfs.
> > + * But real_mount(realfile->f_path.mnt) may be not
> > + * equal to real_mount(file->f_path.mnt). In such case
> > + * fdinfo of the same file which was opened from
> > + * /proc/<pid>/map_files/... and "usual" path
> > + * will show different mnt_id.
> > + *
> > + * We solve issue like in aufs by using additional
> > + * field on struct vm_area_struct called "vm_prfile"
> > + * which is used only for fdinfo/"printing" needs.
> > + *
> > + * See also mm/prfile.c
> > + */
> > +#ifdef CONFIG_MMU
> >  static void ovl_vm_prfile_set(struct vm_area_struct *vma,
> >                               struct file *file)
> >  {
> >         get_file(file);
> > -       vma->vm_prfile = file;
> > -#ifndef CONFIG_MMU
> > +       swap(vma->vm_prfile, file);
> > +       /* Drop reference count from previous file value */
> > +       if (file)
> > +               fput(file);
> > +}
> > +#else
> > +static void ovl_vm_prfile_set(struct vm_area_struct *vma,
> > +                             struct file *file)
> > +{
> > +       struct file *vm_region_file = file;
> > +
> >         get_file(file);
> > -       vma->vm_region->vm_prfile = file;
> > -#endif
> > +       get_file(vm_region_file);
> > +       swap(vma->vm_prfile, file);
> > +       swap(vma->vm_region->vm_prfile, vm_region_file);
> > +       /* Drop reference count from previous file values */
> > +       if (file)
> > +               fput(file);
> > +       if (vm_region_file)
> > +               fput(vm_region_file);
> >  }
> > +#endif
> >
> >  static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
> >  {
> > @@ -411,25 +442,7 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
> >                 vma->vm_file = file;
> >                 fput(realfile);
> >         } else {
> > -               /*
> > -                * In map_files_get_link() (fs/proc/base.c)
> > -                * we need to determine correct path from overlayfs.
> > -                * But real_mount(realfile->f_path.mnt) may be not
> > -                * equal to real_mount(file->f_path.mnt). In such case
> > -                * fdinfo of the same file which was opened from
> > -                * /proc/<pid>/map_files/... and "usual" path
> > -                * will show different mnt_id.
> > -                *
> > -                * We solve issue like in aufs by using additional
> > -                * field on struct vm_area_struct called "vm_prfile"
> > -                * which is used only for fdinfo/"printing" needs.
> > -                *
> > -                * See also mm/prfile.c
> > -                */
> >                 ovl_vm_prfile_set(vma, file);
> > -
> > -               /* Drop reference count from previous vm_file value */
> > -               fput(file);
> >         }
> >
> >         ovl_file_accessed(file);
> > --
> > 2.40.1
> >
> >
> > --
> 
> Dear Andrea,
> 
> I'm almost sure that Focal is not affected by this issue. This issue
> appeared after rebasing to a newer kernel version.
> Have you checked that Focal is affected?

Hi Alex,

I've just checked and focal doesn't seem to be affected (at least the
PoC doesn't trigger the problem).

However, looking at what we're doing here:

static void ovl_vm_prfile_set(struct vm_area_struct *vma,
                              struct file *file)
{
        get_file(file);
        vma->vm_prfile = file;
...

We just blindly overwrite vma->vm_prfile without releasing what could
have been potentially stored there, and maybe that's totally safe in
focal, but I'm not 100% sure that it's always safe.

I'd rather do this:

        get_file(file);
        swap(vma->vm_prfile, file);
        /* Drop reference count from previous file value */
        if (file)
                fput(file);

And in this way the code is also consistent with the other kernels.

What do you think?

-Andrea
Alexander Mikhalitsyn June 22, 2023, 12:47 p.m. UTC | #3
On Thu, Jun 22, 2023 at 2:20 PM Andrea Righi <andrea.righi@canonical.com> wrote:
>
> On Thu, Jun 22, 2023 at 12:14:26PM +0200, Aleksandr Mikhalitsyn wrote:
> > On Thu, Jun 22, 2023 at 11:38 AM Andrea Righi
> > <andrea.righi@canonical.com> wrote:
> > >
> > > BugLink: https://bugs.launchpad.net/bugs/2016398
> > >
> > > Opened files reported in /proc/pid/map_files can be shows with the wrong
> > > mount point using overlayfs with filesystem namspaces.
> > >
> > > This incorrect behavior is fixed:
> > >
> > >   UBUNTU: SAUCE: overlayfs: fix incorrect mnt_id of files opened from map_files
> > >
> > > However, the fix introduced a new regression, the reference to the
> > > original file stored in vma->vm_prfile is not properly released when
> > > vma->vm_prfile is replaced with a new file.
> > >
> > > This can cause a reference counter unbalance, leading errors such as
> > > "target is busy" when trying to unmount overlayfs, even if the
> > > filesystem has not active reference.
> > >
> > > Fix by properly releasing the original file stored in vm_prfile.
> > >
> > > Fixes: 508fdae3f62dd ("UBUNTU: SAUCE: overlayfs: fix incorrect mnt_id of files opened from map_files")
> > > Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
> > > ---
> > >  fs/overlayfs/file.c | 59 +++++++++++++++++++++++++++------------------
> > >  1 file changed, 36 insertions(+), 23 deletions(-)
> > >
> > > diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> > > index 366a4267d5f8..565010d2c9dd 100644
> > > --- a/fs/overlayfs/file.c
> > > +++ b/fs/overlayfs/file.c
> > > @@ -373,17 +373,48 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
> > >         return ret;
> > >  }
> > >
> > > -/* handle vma->vm_prfile */
> > > +/*
> > > + * In map_files_get_link() (fs/proc/base.c)
> > > + * we need to determine correct path from overlayfs.
> > > + * But real_mount(realfile->f_path.mnt) may be not
> > > + * equal to real_mount(file->f_path.mnt). In such case
> > > + * fdinfo of the same file which was opened from
> > > + * /proc/<pid>/map_files/... and "usual" path
> > > + * will show different mnt_id.
> > > + *
> > > + * We solve issue like in aufs by using additional
> > > + * field on struct vm_area_struct called "vm_prfile"
> > > + * which is used only for fdinfo/"printing" needs.
> > > + *
> > > + * See also mm/prfile.c
> > > + */
> > > +#ifdef CONFIG_MMU
> > >  static void ovl_vm_prfile_set(struct vm_area_struct *vma,
> > >                               struct file *file)
> > >  {
> > >         get_file(file);
> > > -       vma->vm_prfile = file;
> > > -#ifndef CONFIG_MMU
> > > +       swap(vma->vm_prfile, file);
> > > +       /* Drop reference count from previous file value */
> > > +       if (file)
> > > +               fput(file);
> > > +}
> > > +#else
> > > +static void ovl_vm_prfile_set(struct vm_area_struct *vma,
> > > +                             struct file *file)
> > > +{
> > > +       struct file *vm_region_file = file;
> > > +
> > >         get_file(file);
> > > -       vma->vm_region->vm_prfile = file;
> > > -#endif
> > > +       get_file(vm_region_file);
> > > +       swap(vma->vm_prfile, file);
> > > +       swap(vma->vm_region->vm_prfile, vm_region_file);
> > > +       /* Drop reference count from previous file values */
> > > +       if (file)
> > > +               fput(file);
> > > +       if (vm_region_file)
> > > +               fput(vm_region_file);
> > >  }
> > > +#endif
> > >
> > >  static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
> > >  {
> > > @@ -411,25 +442,7 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
> > >                 vma->vm_file = file;
> > >                 fput(realfile);
> > >         } else {
> > > -               /*
> > > -                * In map_files_get_link() (fs/proc/base.c)
> > > -                * we need to determine correct path from overlayfs.
> > > -                * But real_mount(realfile->f_path.mnt) may be not
> > > -                * equal to real_mount(file->f_path.mnt). In such case
> > > -                * fdinfo of the same file which was opened from
> > > -                * /proc/<pid>/map_files/... and "usual" path
> > > -                * will show different mnt_id.
> > > -                *
> > > -                * We solve issue like in aufs by using additional
> > > -                * field on struct vm_area_struct called "vm_prfile"
> > > -                * which is used only for fdinfo/"printing" needs.
> > > -                *
> > > -                * See also mm/prfile.c
> > > -                */
> > >                 ovl_vm_prfile_set(vma, file);
> > > -
> > > -               /* Drop reference count from previous vm_file value */
> > > -               fput(file);
> > >         }
> > >
> > >         ovl_file_accessed(file);
> > > --
> > > 2.40.1
> > >
> > >
> > > --
> >
> > Dear Andrea,
> >
> > I'm almost sure that Focal is not affected by this issue. This issue
> > appeared after rebasing to a newer kernel version.
> > Have you checked that Focal is affected?
>
> Hi Alex,
>
> I've just checked and focal doesn't seem to be affected (at least the
> PoC doesn't trigger the problem).

Thanks!

>
> However, looking at what we're doing here:
>
> static void ovl_vm_prfile_set(struct vm_area_struct *vma,
>                               struct file *file)
> {
>         get_file(file);
>         vma->vm_prfile = file;
> ...
>
> We just blindly overwrite vma->vm_prfile without releasing what could
> have been potentially stored there, and maybe that's totally safe in
> focal, but I'm not 100% sure that it's always safe.
>
> I'd rather do this:
>
>         get_file(file);
>         swap(vma->vm_prfile, file);
>         /* Drop reference count from previous file value */
>         if (file)
>                 fput(file);
>
> And in this way the code is also consistent with the other kernels.
>
> What do you think?

It's an interesting point. Seems like this can only be reproduced when
one overlayfs is stacked with another one overlayfs.
Usually, vma->vm_prfile is NULL when we enter the ovl_vm_prfile_set function.

If you don't mind, I'll take a close look at this place on the weekend
and check everything. I know that this place is very-very tricky and
we
have made a few bugs right there before :) Overlayfs is very popular
and I'm afraid that we can affect many users.

The same applies to the Jammy/Kinetic and other trees.

If you wanted to merge this really quick and this can't wait please
tell me. I'll try to take a closer look at that closely today evening
or tomorrow then.

Kind regards,
Alex

>
> -Andrea
Andrea Righi June 22, 2023, 12:56 p.m. UTC | #4
On Thu, Jun 22, 2023 at 02:47:03PM +0200, Aleksandr Mikhalitsyn wrote:
> On Thu, Jun 22, 2023 at 2:20 PM Andrea Righi <andrea.righi@canonical.com> wrote:
> >
> > On Thu, Jun 22, 2023 at 12:14:26PM +0200, Aleksandr Mikhalitsyn wrote:
> > > On Thu, Jun 22, 2023 at 11:38 AM Andrea Righi
> > > <andrea.righi@canonical.com> wrote:
> > > >
> > > > BugLink: https://bugs.launchpad.net/bugs/2016398
> > > >
> > > > Opened files reported in /proc/pid/map_files can be shows with the wrong
> > > > mount point using overlayfs with filesystem namspaces.
> > > >
> > > > This incorrect behavior is fixed:
> > > >
> > > >   UBUNTU: SAUCE: overlayfs: fix incorrect mnt_id of files opened from map_files
> > > >
> > > > However, the fix introduced a new regression, the reference to the
> > > > original file stored in vma->vm_prfile is not properly released when
> > > > vma->vm_prfile is replaced with a new file.
> > > >
> > > > This can cause a reference counter unbalance, leading errors such as
> > > > "target is busy" when trying to unmount overlayfs, even if the
> > > > filesystem has not active reference.
> > > >
> > > > Fix by properly releasing the original file stored in vm_prfile.
> > > >
> > > > Fixes: 508fdae3f62dd ("UBUNTU: SAUCE: overlayfs: fix incorrect mnt_id of files opened from map_files")
> > > > Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
> > > > ---
> > > >  fs/overlayfs/file.c | 59 +++++++++++++++++++++++++++------------------
> > > >  1 file changed, 36 insertions(+), 23 deletions(-)
> > > >
> > > > diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> > > > index 366a4267d5f8..565010d2c9dd 100644
> > > > --- a/fs/overlayfs/file.c
> > > > +++ b/fs/overlayfs/file.c
> > > > @@ -373,17 +373,48 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
> > > >         return ret;
> > > >  }
> > > >
> > > > -/* handle vma->vm_prfile */
> > > > +/*
> > > > + * In map_files_get_link() (fs/proc/base.c)
> > > > + * we need to determine correct path from overlayfs.
> > > > + * But real_mount(realfile->f_path.mnt) may be not
> > > > + * equal to real_mount(file->f_path.mnt). In such case
> > > > + * fdinfo of the same file which was opened from
> > > > + * /proc/<pid>/map_files/... and "usual" path
> > > > + * will show different mnt_id.
> > > > + *
> > > > + * We solve issue like in aufs by using additional
> > > > + * field on struct vm_area_struct called "vm_prfile"
> > > > + * which is used only for fdinfo/"printing" needs.
> > > > + *
> > > > + * See also mm/prfile.c
> > > > + */
> > > > +#ifdef CONFIG_MMU
> > > >  static void ovl_vm_prfile_set(struct vm_area_struct *vma,
> > > >                               struct file *file)
> > > >  {
> > > >         get_file(file);
> > > > -       vma->vm_prfile = file;
> > > > -#ifndef CONFIG_MMU
> > > > +       swap(vma->vm_prfile, file);
> > > > +       /* Drop reference count from previous file value */
> > > > +       if (file)
> > > > +               fput(file);
> > > > +}
> > > > +#else
> > > > +static void ovl_vm_prfile_set(struct vm_area_struct *vma,
> > > > +                             struct file *file)
> > > > +{
> > > > +       struct file *vm_region_file = file;
> > > > +
> > > >         get_file(file);
> > > > -       vma->vm_region->vm_prfile = file;
> > > > -#endif
> > > > +       get_file(vm_region_file);
> > > > +       swap(vma->vm_prfile, file);
> > > > +       swap(vma->vm_region->vm_prfile, vm_region_file);
> > > > +       /* Drop reference count from previous file values */
> > > > +       if (file)
> > > > +               fput(file);
> > > > +       if (vm_region_file)
> > > > +               fput(vm_region_file);
> > > >  }
> > > > +#endif
> > > >
> > > >  static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
> > > >  {
> > > > @@ -411,25 +442,7 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
> > > >                 vma->vm_file = file;
> > > >                 fput(realfile);
> > > >         } else {
> > > > -               /*
> > > > -                * In map_files_get_link() (fs/proc/base.c)
> > > > -                * we need to determine correct path from overlayfs.
> > > > -                * But real_mount(realfile->f_path.mnt) may be not
> > > > -                * equal to real_mount(file->f_path.mnt). In such case
> > > > -                * fdinfo of the same file which was opened from
> > > > -                * /proc/<pid>/map_files/... and "usual" path
> > > > -                * will show different mnt_id.
> > > > -                *
> > > > -                * We solve issue like in aufs by using additional
> > > > -                * field on struct vm_area_struct called "vm_prfile"
> > > > -                * which is used only for fdinfo/"printing" needs.
> > > > -                *
> > > > -                * See also mm/prfile.c
> > > > -                */
> > > >                 ovl_vm_prfile_set(vma, file);
> > > > -
> > > > -               /* Drop reference count from previous vm_file value */
> > > > -               fput(file);
> > > >         }
> > > >
> > > >         ovl_file_accessed(file);
> > > > --
> > > > 2.40.1
> > > >
> > > >
> > > > --
> > >
> > > Dear Andrea,
> > >
> > > I'm almost sure that Focal is not affected by this issue. This issue
> > > appeared after rebasing to a newer kernel version.
> > > Have you checked that Focal is affected?
> >
> > Hi Alex,
> >
> > I've just checked and focal doesn't seem to be affected (at least the
> > PoC doesn't trigger the problem).
> 
> Thanks!
> 
> >
> > However, looking at what we're doing here:
> >
> > static void ovl_vm_prfile_set(struct vm_area_struct *vma,
> >                               struct file *file)
> > {
> >         get_file(file);
> >         vma->vm_prfile = file;
> > ...
> >
> > We just blindly overwrite vma->vm_prfile without releasing what could
> > have been potentially stored there, and maybe that's totally safe in
> > focal, but I'm not 100% sure that it's always safe.
> >
> > I'd rather do this:
> >
> >         get_file(file);
> >         swap(vma->vm_prfile, file);
> >         /* Drop reference count from previous file value */
> >         if (file)
> >                 fput(file);
> >
> > And in this way the code is also consistent with the other kernels.
> >
> > What do you think?
> 
> It's an interesting point. Seems like this can only be reproduced when
> one overlayfs is stacked with another one overlayfs.
> Usually, vma->vm_prfile is NULL when we enter the ovl_vm_prfile_set function.
> 
> If you don't mind, I'll take a close look at this place on the weekend
> and check everything. I know that this place is very-very tricky and
> we
> have made a few bugs right there before :) Overlayfs is very popular
> and I'm afraid that we can affect many users.
> 
> The same applies to the Jammy/Kinetic and other trees.
> 
> If you wanted to merge this really quick and this can't wait please
> tell me. I'll try to take a closer look at that closely today evening
> or tomorrow then.

I'm totally happy to wait, better to be 100% sure when we touch these
overlayfs SAUCE patches.

Side note: I've started to collect all the little test cases that are
scattered here and there across multiple git commits, bugs, etc. to see
if we can put together a proper test plan for all the custom changes
that we have applied to overlayfs (and shiftfs as well).

I don't think we have anything at the moment that is checking if this
stuff is working as expected for our needs, and that's quite scary,
because, as you mentioned, this stuff is affecting a lot of users.

Do you have any specific test case already that I could possibly
integrate in this potential "overlayfs" test suite?

Thanks,
-Andrea
Alexander Mikhalitsyn June 28, 2023, 3:01 p.m. UTC | #5
On Thu, Jun 22, 2023 at 2:56 PM Andrea Righi <andrea.righi@canonical.com> wrote:
>
> On Thu, Jun 22, 2023 at 02:47:03PM +0200, Aleksandr Mikhalitsyn wrote:
> > On Thu, Jun 22, 2023 at 2:20 PM Andrea Righi <andrea.righi@canonical.com> wrote:
> > >
> > > On Thu, Jun 22, 2023 at 12:14:26PM +0200, Aleksandr Mikhalitsyn wrote:
> > > > On Thu, Jun 22, 2023 at 11:38 AM Andrea Righi
> > > > <andrea.righi@canonical.com> wrote:
> > > > >
> > > > > BugLink: https://bugs.launchpad.net/bugs/2016398
> > > > >
> > > > > Opened files reported in /proc/pid/map_files can be shows with the wrong
> > > > > mount point using overlayfs with filesystem namspaces.
> > > > >
> > > > > This incorrect behavior is fixed:
> > > > >
> > > > >   UBUNTU: SAUCE: overlayfs: fix incorrect mnt_id of files opened from map_files
> > > > >
> > > > > However, the fix introduced a new regression, the reference to the
> > > > > original file stored in vma->vm_prfile is not properly released when
> > > > > vma->vm_prfile is replaced with a new file.
> > > > >
> > > > > This can cause a reference counter unbalance, leading errors such as
> > > > > "target is busy" when trying to unmount overlayfs, even if the
> > > > > filesystem has not active reference.
> > > > >
> > > > > Fix by properly releasing the original file stored in vm_prfile.
> > > > >
> > > > > Fixes: 508fdae3f62dd ("UBUNTU: SAUCE: overlayfs: fix incorrect mnt_id of files opened from map_files")
> > > > > Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
> > > > > ---
> > > > >  fs/overlayfs/file.c | 59 +++++++++++++++++++++++++++------------------
> > > > >  1 file changed, 36 insertions(+), 23 deletions(-)
> > > > >
> > > > > diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> > > > > index 366a4267d5f8..565010d2c9dd 100644
> > > > > --- a/fs/overlayfs/file.c
> > > > > +++ b/fs/overlayfs/file.c
> > > > > @@ -373,17 +373,48 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
> > > > >         return ret;
> > > > >  }
> > > > >
> > > > > -/* handle vma->vm_prfile */
> > > > > +/*
> > > > > + * In map_files_get_link() (fs/proc/base.c)
> > > > > + * we need to determine correct path from overlayfs.
> > > > > + * But real_mount(realfile->f_path.mnt) may be not
> > > > > + * equal to real_mount(file->f_path.mnt). In such case
> > > > > + * fdinfo of the same file which was opened from
> > > > > + * /proc/<pid>/map_files/... and "usual" path
> > > > > + * will show different mnt_id.
> > > > > + *
> > > > > + * We solve issue like in aufs by using additional
> > > > > + * field on struct vm_area_struct called "vm_prfile"
> > > > > + * which is used only for fdinfo/"printing" needs.
> > > > > + *
> > > > > + * See also mm/prfile.c
> > > > > + */
> > > > > +#ifdef CONFIG_MMU
> > > > >  static void ovl_vm_prfile_set(struct vm_area_struct *vma,
> > > > >                               struct file *file)
> > > > >  {
> > > > >         get_file(file);
> > > > > -       vma->vm_prfile = file;
> > > > > -#ifndef CONFIG_MMU
> > > > > +       swap(vma->vm_prfile, file);
> > > > > +       /* Drop reference count from previous file value */
> > > > > +       if (file)
> > > > > +               fput(file);
> > > > > +}
> > > > > +#else
> > > > > +static void ovl_vm_prfile_set(struct vm_area_struct *vma,
> > > > > +                             struct file *file)
> > > > > +{
> > > > > +       struct file *vm_region_file = file;
> > > > > +
> > > > >         get_file(file);
> > > > > -       vma->vm_region->vm_prfile = file;
> > > > > -#endif
> > > > > +       get_file(vm_region_file);
> > > > > +       swap(vma->vm_prfile, file);
> > > > > +       swap(vma->vm_region->vm_prfile, vm_region_file);
> > > > > +       /* Drop reference count from previous file values */
> > > > > +       if (file)
> > > > > +               fput(file);
> > > > > +       if (vm_region_file)
> > > > > +               fput(vm_region_file);
> > > > >  }
> > > > > +#endif
> > > > >
> > > > >  static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
> > > > >  {
> > > > > @@ -411,25 +442,7 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
> > > > >                 vma->vm_file = file;
> > > > >                 fput(realfile);
> > > > >         } else {
> > > > > -               /*
> > > > > -                * In map_files_get_link() (fs/proc/base.c)
> > > > > -                * we need to determine correct path from overlayfs.
> > > > > -                * But real_mount(realfile->f_path.mnt) may be not
> > > > > -                * equal to real_mount(file->f_path.mnt). In such case
> > > > > -                * fdinfo of the same file which was opened from
> > > > > -                * /proc/<pid>/map_files/... and "usual" path
> > > > > -                * will show different mnt_id.
> > > > > -                *
> > > > > -                * We solve issue like in aufs by using additional
> > > > > -                * field on struct vm_area_struct called "vm_prfile"
> > > > > -                * which is used only for fdinfo/"printing" needs.
> > > > > -                *
> > > > > -                * See also mm/prfile.c
> > > > > -                */
> > > > >                 ovl_vm_prfile_set(vma, file);
> > > > > -
> > > > > -               /* Drop reference count from previous vm_file value */
> > > > > -               fput(file);
> > > > >         }
> > > > >
> > > > >         ovl_file_accessed(file);
> > > > > --
> > > > > 2.40.1
> > > > >
> > > > >
> > > > > --
> > > >
> > > > Dear Andrea,
> > > >
> > > > I'm almost sure that Focal is not affected by this issue. This issue
> > > > appeared after rebasing to a newer kernel version.
> > > > Have you checked that Focal is affected?
> > >
> > > Hi Alex,
> > >
> > > I've just checked and focal doesn't seem to be affected (at least the
> > > PoC doesn't trigger the problem).
> >
> > Thanks!
> >
> > >
> > > However, looking at what we're doing here:
> > >
> > > static void ovl_vm_prfile_set(struct vm_area_struct *vma,
> > >                               struct file *file)
> > > {
> > >         get_file(file);
> > >         vma->vm_prfile = file;
> > > ...
> > >
> > > We just blindly overwrite vma->vm_prfile without releasing what could
> > > have been potentially stored there, and maybe that's totally safe in
> > > focal, but I'm not 100% sure that it's always safe.
> > >
> > > I'd rather do this:
> > >
> > >         get_file(file);
> > >         swap(vma->vm_prfile, file);
> > >         /* Drop reference count from previous file value */
> > >         if (file)
> > >                 fput(file);
> > >
> > > And in this way the code is also consistent with the other kernels.
> > >
> > > What do you think?
> >
> > It's an interesting point. Seems like this can only be reproduced when
> > one overlayfs is stacked with another one overlayfs.
> > Usually, vma->vm_prfile is NULL when we enter the ovl_vm_prfile_set function.
> >
> > If you don't mind, I'll take a close look at this place on the weekend
> > and check everything. I know that this place is very-very tricky and
> > we
> > have made a few bugs right there before :) Overlayfs is very popular
> > and I'm afraid that we can affect many users.
> >
> > The same applies to the Jammy/Kinetic and other trees.
> >
> > If you wanted to merge this really quick and this can't wait please
> > tell me. I'll try to take a closer look at that closely today evening
> > or tomorrow then.

Dear Andrea,

>
> I'm totally happy to wait, better to be 100% sure when we touch these
> overlayfs SAUCE patches.

I've added a comment with the result of my investigation of this issue.
https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2016398/comments/17

>
> Side note: I've started to collect all the little test cases that are
> scattered here and there across multiple git commits, bugs, etc. to see
> if we can put together a proper test plan for all the custom changes
> that we have applied to overlayfs (and shiftfs as well).
>
> I don't think we have anything at the moment that is checking if this
> stuff is working as expected for our needs, and that's quite scary,
> because, as you mentioned, this stuff is affecting a lot of users.
>
> Do you have any specific test case already that I could possibly
> integrate in this potential "overlayfs" test suite?

It's a very good idea!

First of all, this patch contains testcase in the description:
https://git.launchpad.net/~ubuntu-kernel/ubuntu/+source/linux/+git/focal/commit/fs/overlayfs/file.c?h=master-next&id=28eab192cf0e37156fc41b36f06790d5ca984834

Another test that comes to my mind is to mount overlayfs on top of
shiftfs and do some basic files/directories operations.

Kind regards,
Alex

>
> Thanks,
> -Andrea
Alexander Mikhalitsyn June 28, 2023, 3:32 p.m. UTC | #6
On Thu, Jun 22, 2023 at 11:38 AM Andrea Righi
<andrea.righi@canonical.com> wrote:
>
> BugLink: https://bugs.launchpad.net/bugs/2016398
>
> Opened files reported in /proc/pid/map_files can be shows with the wrong
> mount point using overlayfs with filesystem namspaces.
>
> This incorrect behavior is fixed:
>
>   UBUNTU: SAUCE: overlayfs: fix incorrect mnt_id of files opened from map_files
>
> However, the fix introduced a new regression, the reference to the
> original file stored in vma->vm_prfile is not properly released when
> vma->vm_prfile is replaced with a new file.
>
> This can cause a reference counter unbalance, leading errors such as
> "target is busy" when trying to unmount overlayfs, even if the
> filesystem has not active reference.
>
> Fix by properly releasing the original file stored in vm_prfile.
>
> Fixes: 508fdae3f62dd ("UBUNTU: SAUCE: overlayfs: fix incorrect mnt_id of files opened from map_files")
> Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
> ---
>  fs/overlayfs/file.c | 59 +++++++++++++++++++++++++++------------------
>  1 file changed, 36 insertions(+), 23 deletions(-)
>
> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> index 366a4267d5f8..565010d2c9dd 100644
> --- a/fs/overlayfs/file.c
> +++ b/fs/overlayfs/file.c
> @@ -373,17 +373,48 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
>         return ret;
>  }
>
> -/* handle vma->vm_prfile */
> +/*
> + * In map_files_get_link() (fs/proc/base.c)
> + * we need to determine correct path from overlayfs.
> + * But real_mount(realfile->f_path.mnt) may be not
> + * equal to real_mount(file->f_path.mnt). In such case
> + * fdinfo of the same file which was opened from
> + * /proc/<pid>/map_files/... and "usual" path
> + * will show different mnt_id.
> + *
> + * We solve issue like in aufs by using additional
> + * field on struct vm_area_struct called "vm_prfile"
> + * which is used only for fdinfo/"printing" needs.
> + *
> + * See also mm/prfile.c
> + */
> +#ifdef CONFIG_MMU
>  static void ovl_vm_prfile_set(struct vm_area_struct *vma,
>                               struct file *file)
>  {
>         get_file(file);
> -       vma->vm_prfile = file;
> -#ifndef CONFIG_MMU
> +       swap(vma->vm_prfile, file);
> +       /* Drop reference count from previous file value */
> +       if (file)
> +               fput(file);
> +}
> +#else
> +static void ovl_vm_prfile_set(struct vm_area_struct *vma,
> +                             struct file *file)
> +{
> +       struct file *vm_region_file = file;
> +
>         get_file(file);
> -       vma->vm_region->vm_prfile = file;
> -#endif
> +       get_file(vm_region_file);
> +       swap(vma->vm_prfile, file);
> +       swap(vma->vm_region->vm_prfile, vm_region_file);
> +       /* Drop reference count from previous file values */
> +       if (file)
> +               fput(file);
> +       if (vm_region_file)
> +               fput(vm_region_file);
>  }
> +#endif
>
>  static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
>  {
> @@ -411,25 +442,7 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
>                 vma->vm_file = file;
>                 fput(realfile);
>         } else {
> -               /*
> -                * In map_files_get_link() (fs/proc/base.c)
> -                * we need to determine correct path from overlayfs.
> -                * But real_mount(realfile->f_path.mnt) may be not
> -                * equal to real_mount(file->f_path.mnt). In such case
> -                * fdinfo of the same file which was opened from
> -                * /proc/<pid>/map_files/... and "usual" path
> -                * will show different mnt_id.
> -                *
> -                * We solve issue like in aufs by using additional
> -                * field on struct vm_area_struct called "vm_prfile"
> -                * which is used only for fdinfo/"printing" needs.
> -                *
> -                * See also mm/prfile.c
> -                */
>                 ovl_vm_prfile_set(vma, file);
> -
> -               /* Drop reference count from previous vm_file value */
> -               fput(file);

I'm not sure that we need to touch this line because it is not from
the Ubuntu Sauce patch, but from the original Linux kernel tree.
Removing it can cause a leak.

>         }
>
>         ovl_file_accessed(file);
> --
> 2.40.1
>
>
> --
> kernel-team mailing list
> kernel-team@lists.ubuntu.com
> https://lists.ubuntu.com/mailman/listinfo/kernel-team
Andrea Righi June 29, 2023, 6:03 a.m. UTC | #7
On Wed, Jun 28, 2023 at 05:32:21PM +0200, Aleksandr Mikhalitsyn wrote:
> On Thu, Jun 22, 2023 at 11:38 AM Andrea Righi
> <andrea.righi@canonical.com> wrote:
> >
> > BugLink: https://bugs.launchpad.net/bugs/2016398
> >
> > Opened files reported in /proc/pid/map_files can be shows with the wrong
> > mount point using overlayfs with filesystem namspaces.
> >
> > This incorrect behavior is fixed:
> >
> >   UBUNTU: SAUCE: overlayfs: fix incorrect mnt_id of files opened from map_files
> >
> > However, the fix introduced a new regression, the reference to the
> > original file stored in vma->vm_prfile is not properly released when
> > vma->vm_prfile is replaced with a new file.
> >
> > This can cause a reference counter unbalance, leading errors such as
> > "target is busy" when trying to unmount overlayfs, even if the
> > filesystem has not active reference.
> >
> > Fix by properly releasing the original file stored in vm_prfile.
> >
> > Fixes: 508fdae3f62dd ("UBUNTU: SAUCE: overlayfs: fix incorrect mnt_id of files opened from map_files")
> > Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
> > ---
> >  fs/overlayfs/file.c | 59 +++++++++++++++++++++++++++------------------
> >  1 file changed, 36 insertions(+), 23 deletions(-)
> >
> > diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> > index 366a4267d5f8..565010d2c9dd 100644
> > --- a/fs/overlayfs/file.c
> > +++ b/fs/overlayfs/file.c
> > @@ -373,17 +373,48 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
> >         return ret;
> >  }
> >
> > -/* handle vma->vm_prfile */
> > +/*
> > + * In map_files_get_link() (fs/proc/base.c)
> > + * we need to determine correct path from overlayfs.
> > + * But real_mount(realfile->f_path.mnt) may be not
> > + * equal to real_mount(file->f_path.mnt). In such case
> > + * fdinfo of the same file which was opened from
> > + * /proc/<pid>/map_files/... and "usual" path
> > + * will show different mnt_id.
> > + *
> > + * We solve issue like in aufs by using additional
> > + * field on struct vm_area_struct called "vm_prfile"
> > + * which is used only for fdinfo/"printing" needs.
> > + *
> > + * See also mm/prfile.c
> > + */
> > +#ifdef CONFIG_MMU
> >  static void ovl_vm_prfile_set(struct vm_area_struct *vma,
> >                               struct file *file)
> >  {
> >         get_file(file);
> > -       vma->vm_prfile = file;
> > -#ifndef CONFIG_MMU
> > +       swap(vma->vm_prfile, file);
> > +       /* Drop reference count from previous file value */
> > +       if (file)
> > +               fput(file);
> > +}
> > +#else
> > +static void ovl_vm_prfile_set(struct vm_area_struct *vma,
> > +                             struct file *file)
> > +{
> > +       struct file *vm_region_file = file;
> > +
> >         get_file(file);
> > -       vma->vm_region->vm_prfile = file;
> > -#endif
> > +       get_file(vm_region_file);
> > +       swap(vma->vm_prfile, file);
> > +       swap(vma->vm_region->vm_prfile, vm_region_file);
> > +       /* Drop reference count from previous file values */
> > +       if (file)
> > +               fput(file);
> > +       if (vm_region_file)
> > +               fput(vm_region_file);
> >  }
> > +#endif
> >
> >  static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
> >  {
> > @@ -411,25 +442,7 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
> >                 vma->vm_file = file;
> >                 fput(realfile);
> >         } else {
> > -               /*
> > -                * In map_files_get_link() (fs/proc/base.c)
> > -                * we need to determine correct path from overlayfs.
> > -                * But real_mount(realfile->f_path.mnt) may be not
> > -                * equal to real_mount(file->f_path.mnt). In such case
> > -                * fdinfo of the same file which was opened from
> > -                * /proc/<pid>/map_files/... and "usual" path
> > -                * will show different mnt_id.
> > -                *
> > -                * We solve issue like in aufs by using additional
> > -                * field on struct vm_area_struct called "vm_prfile"
> > -                * which is used only for fdinfo/"printing" needs.
> > -                *
> > -                * See also mm/prfile.c
> > -                */
> >                 ovl_vm_prfile_set(vma, file);
> > -
> > -               /* Drop reference count from previous vm_file value */
> > -               fput(file);
> 
> I'm not sure that we need to touch this line because it is not from
> the Ubuntu Sauce patch, but from the original Linux kernel tree.
> Removing it can cause a leak.

That line is not from the SAUCE patch, but before my change
ovl_vm_prfile_set() wasn't doing an fput(file). Now that
ovl_vm_prfile_set() is also taking care of doing fput(file), I think we
need to remove that line, otherwise we would release the file twice.

-Andrea
Alexander Mikhalitsyn June 29, 2023, 9:33 a.m. UTC | #8
On Thu, Jun 29, 2023 at 8:03 AM Andrea Righi <andrea.righi@canonical.com> wrote:
>
> On Wed, Jun 28, 2023 at 05:32:21PM +0200, Aleksandr Mikhalitsyn wrote:
> > On Thu, Jun 22, 2023 at 11:38 AM Andrea Righi
> > <andrea.righi@canonical.com> wrote:
> > >
> > > BugLink: https://bugs.launchpad.net/bugs/2016398
> > >
> > > Opened files reported in /proc/pid/map_files can be shows with the wrong
> > > mount point using overlayfs with filesystem namspaces.
> > >
> > > This incorrect behavior is fixed:
> > >
> > >   UBUNTU: SAUCE: overlayfs: fix incorrect mnt_id of files opened from map_files
> > >
> > > However, the fix introduced a new regression, the reference to the
> > > original file stored in vma->vm_prfile is not properly released when
> > > vma->vm_prfile is replaced with a new file.
> > >
> > > This can cause a reference counter unbalance, leading errors such as
> > > "target is busy" when trying to unmount overlayfs, even if the
> > > filesystem has not active reference.
> > >
> > > Fix by properly releasing the original file stored in vm_prfile.
> > >
> > > Fixes: 508fdae3f62dd ("UBUNTU: SAUCE: overlayfs: fix incorrect mnt_id of files opened from map_files")
> > > Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
> > > ---
> > >  fs/overlayfs/file.c | 59 +++++++++++++++++++++++++++------------------
> > >  1 file changed, 36 insertions(+), 23 deletions(-)
> > >
> > > diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> > > index 366a4267d5f8..565010d2c9dd 100644
> > > --- a/fs/overlayfs/file.c
> > > +++ b/fs/overlayfs/file.c
> > > @@ -373,17 +373,48 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
> > >         return ret;
> > >  }
> > >
> > > -/* handle vma->vm_prfile */
> > > +/*
> > > + * In map_files_get_link() (fs/proc/base.c)
> > > + * we need to determine correct path from overlayfs.
> > > + * But real_mount(realfile->f_path.mnt) may be not
> > > + * equal to real_mount(file->f_path.mnt). In such case
> > > + * fdinfo of the same file which was opened from
> > > + * /proc/<pid>/map_files/... and "usual" path
> > > + * will show different mnt_id.
> > > + *
> > > + * We solve issue like in aufs by using additional
> > > + * field on struct vm_area_struct called "vm_prfile"
> > > + * which is used only for fdinfo/"printing" needs.
> > > + *
> > > + * See also mm/prfile.c
> > > + */
> > > +#ifdef CONFIG_MMU
> > >  static void ovl_vm_prfile_set(struct vm_area_struct *vma,
> > >                               struct file *file)
> > >  {
> > >         get_file(file);
> > > -       vma->vm_prfile = file;
> > > -#ifndef CONFIG_MMU
> > > +       swap(vma->vm_prfile, file);
> > > +       /* Drop reference count from previous file value */
> > > +       if (file)
> > > +               fput(file);
> > > +}
> > > +#else
> > > +static void ovl_vm_prfile_set(struct vm_area_struct *vma,
> > > +                             struct file *file)
> > > +{
> > > +       struct file *vm_region_file = file;
> > > +
> > >         get_file(file);
> > > -       vma->vm_region->vm_prfile = file;
> > > -#endif
> > > +       get_file(vm_region_file);
> > > +       swap(vma->vm_prfile, file);
> > > +       swap(vma->vm_region->vm_prfile, vm_region_file);
> > > +       /* Drop reference count from previous file values */
> > > +       if (file)
> > > +               fput(file);
> > > +       if (vm_region_file)
> > > +               fput(vm_region_file);
> > >  }
> > > +#endif
> > >
> > >  static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
> > >  {
> > > @@ -411,25 +442,7 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
> > >                 vma->vm_file = file;
> > >                 fput(realfile);
> > >         } else {
> > > -               /*
> > > -                * In map_files_get_link() (fs/proc/base.c)
> > > -                * we need to determine correct path from overlayfs.
> > > -                * But real_mount(realfile->f_path.mnt) may be not
> > > -                * equal to real_mount(file->f_path.mnt). In such case
> > > -                * fdinfo of the same file which was opened from
> > > -                * /proc/<pid>/map_files/... and "usual" path
> > > -                * will show different mnt_id.
> > > -                *
> > > -                * We solve issue like in aufs by using additional
> > > -                * field on struct vm_area_struct called "vm_prfile"
> > > -                * which is used only for fdinfo/"printing" needs.
> > > -                *
> > > -                * See also mm/prfile.c
> > > -                */
> > >                 ovl_vm_prfile_set(vma, file);
> > > -
> > > -               /* Drop reference count from previous vm_file value */
> > > -               fput(file);
> >
> > I'm not sure that we need to touch this line because it is not from
> > the Ubuntu Sauce patch, but from the original Linux kernel tree.
> > Removing it can cause a leak.
>
> That line is not from the SAUCE patch, but before my change
> ovl_vm_prfile_set() wasn't doing an fput(file). Now that
> ovl_vm_prfile_set() is also taking care of doing fput(file), I think we
> need to remove that line, otherwise we would release the file twice.

fput inside ovl_vm_prfile_set releases the struct file that was in
vma->vm_prfile (if it is not NULL),
but this fput(file) is not about that. It releases and an old struct
file reference that was in vma->vm_file
when ovl_mmap() was called. This reference was taken in mmap_region()
just before the call_mmap() call.
So, as in ovl_mmap we replace vma->vm_file with a new one, we need to
release an old reference.

Please note, that vma->vm_file != vma->vm_prfile.

Kind regards,
Alex

>
> -Andrea
Andrea Righi June 29, 2023, 9:40 a.m. UTC | #9
On Thu, Jun 29, 2023 at 11:33:50AM +0200, Aleksandr Mikhalitsyn wrote:
> On Thu, Jun 29, 2023 at 8:03 AM Andrea Righi <andrea.righi@canonical.com> wrote:
> >
> > On Wed, Jun 28, 2023 at 05:32:21PM +0200, Aleksandr Mikhalitsyn wrote:
> > > On Thu, Jun 22, 2023 at 11:38 AM Andrea Righi
> > > <andrea.righi@canonical.com> wrote:
> > > >
> > > > BugLink: https://bugs.launchpad.net/bugs/2016398
> > > >
> > > > Opened files reported in /proc/pid/map_files can be shows with the wrong
> > > > mount point using overlayfs with filesystem namspaces.
> > > >
> > > > This incorrect behavior is fixed:
> > > >
> > > >   UBUNTU: SAUCE: overlayfs: fix incorrect mnt_id of files opened from map_files
> > > >
> > > > However, the fix introduced a new regression, the reference to the
> > > > original file stored in vma->vm_prfile is not properly released when
> > > > vma->vm_prfile is replaced with a new file.
> > > >
> > > > This can cause a reference counter unbalance, leading errors such as
> > > > "target is busy" when trying to unmount overlayfs, even if the
> > > > filesystem has not active reference.
> > > >
> > > > Fix by properly releasing the original file stored in vm_prfile.
> > > >
> > > > Fixes: 508fdae3f62dd ("UBUNTU: SAUCE: overlayfs: fix incorrect mnt_id of files opened from map_files")
> > > > Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
> > > > ---
> > > >  fs/overlayfs/file.c | 59 +++++++++++++++++++++++++++------------------
> > > >  1 file changed, 36 insertions(+), 23 deletions(-)
> > > >
> > > > diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> > > > index 366a4267d5f8..565010d2c9dd 100644
> > > > --- a/fs/overlayfs/file.c
> > > > +++ b/fs/overlayfs/file.c
> > > > @@ -373,17 +373,48 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
> > > >         return ret;
> > > >  }
> > > >
> > > > -/* handle vma->vm_prfile */
> > > > +/*
> > > > + * In map_files_get_link() (fs/proc/base.c)
> > > > + * we need to determine correct path from overlayfs.
> > > > + * But real_mount(realfile->f_path.mnt) may be not
> > > > + * equal to real_mount(file->f_path.mnt). In such case
> > > > + * fdinfo of the same file which was opened from
> > > > + * /proc/<pid>/map_files/... and "usual" path
> > > > + * will show different mnt_id.
> > > > + *
> > > > + * We solve issue like in aufs by using additional
> > > > + * field on struct vm_area_struct called "vm_prfile"
> > > > + * which is used only for fdinfo/"printing" needs.
> > > > + *
> > > > + * See also mm/prfile.c
> > > > + */
> > > > +#ifdef CONFIG_MMU
> > > >  static void ovl_vm_prfile_set(struct vm_area_struct *vma,
> > > >                               struct file *file)
> > > >  {
> > > >         get_file(file);
> > > > -       vma->vm_prfile = file;
> > > > -#ifndef CONFIG_MMU
> > > > +       swap(vma->vm_prfile, file);
> > > > +       /* Drop reference count from previous file value */
> > > > +       if (file)
> > > > +               fput(file);
> > > > +}
> > > > +#else
> > > > +static void ovl_vm_prfile_set(struct vm_area_struct *vma,
> > > > +                             struct file *file)
> > > > +{
> > > > +       struct file *vm_region_file = file;
> > > > +
> > > >         get_file(file);
> > > > -       vma->vm_region->vm_prfile = file;
> > > > -#endif
> > > > +       get_file(vm_region_file);
> > > > +       swap(vma->vm_prfile, file);
> > > > +       swap(vma->vm_region->vm_prfile, vm_region_file);
> > > > +       /* Drop reference count from previous file values */
> > > > +       if (file)
> > > > +               fput(file);
> > > > +       if (vm_region_file)
> > > > +               fput(vm_region_file);
> > > >  }
> > > > +#endif
> > > >
> > > >  static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
> > > >  {
> > > > @@ -411,25 +442,7 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
> > > >                 vma->vm_file = file;
> > > >                 fput(realfile);
> > > >         } else {
> > > > -               /*
> > > > -                * In map_files_get_link() (fs/proc/base.c)
> > > > -                * we need to determine correct path from overlayfs.
> > > > -                * But real_mount(realfile->f_path.mnt) may be not
> > > > -                * equal to real_mount(file->f_path.mnt). In such case
> > > > -                * fdinfo of the same file which was opened from
> > > > -                * /proc/<pid>/map_files/... and "usual" path
> > > > -                * will show different mnt_id.
> > > > -                *
> > > > -                * We solve issue like in aufs by using additional
> > > > -                * field on struct vm_area_struct called "vm_prfile"
> > > > -                * which is used only for fdinfo/"printing" needs.
> > > > -                *
> > > > -                * See also mm/prfile.c
> > > > -                */
> > > >                 ovl_vm_prfile_set(vma, file);
> > > > -
> > > > -               /* Drop reference count from previous vm_file value */
> > > > -               fput(file);
> > >
> > > I'm not sure that we need to touch this line because it is not from
> > > the Ubuntu Sauce patch, but from the original Linux kernel tree.
> > > Removing it can cause a leak.
> >
> > That line is not from the SAUCE patch, but before my change
> > ovl_vm_prfile_set() wasn't doing an fput(file). Now that
> > ovl_vm_prfile_set() is also taking care of doing fput(file), I think we
> > need to remove that line, otherwise we would release the file twice.
> 
> fput inside ovl_vm_prfile_set releases the struct file that was in
> vma->vm_prfile (if it is not NULL),
> but this fput(file) is not about that. It releases and an old struct
> file reference that was in vma->vm_file
> when ovl_mmap() was called. This reference was taken in mmap_region()
> just before the call_mmap() call.
> So, as in ovl_mmap we replace vma->vm_file with a new one, we need to
> release an old reference.

Oh yes, you are absolutely right, sorry I looked at the code too
quickly.  Nice catch! I'll send a new version for the focal patch.

Thanks!
-Andrea

> 
> Please note, that vma->vm_file != vma->vm_prfile.
> 
> Kind regards,
> Alex
> 
> >
> > -Andrea
Alexander Mikhalitsyn June 29, 2023, 9:43 a.m. UTC | #10
On Thu, Jun 29, 2023 at 11:40 AM Andrea Righi
<andrea.righi@canonical.com> wrote:
>
> On Thu, Jun 29, 2023 at 11:33:50AM +0200, Aleksandr Mikhalitsyn wrote:
> > On Thu, Jun 29, 2023 at 8:03 AM Andrea Righi <andrea.righi@canonical.com> wrote:
> > >
> > > On Wed, Jun 28, 2023 at 05:32:21PM +0200, Aleksandr Mikhalitsyn wrote:
> > > > On Thu, Jun 22, 2023 at 11:38 AM Andrea Righi
> > > > <andrea.righi@canonical.com> wrote:
> > > > >
> > > > > BugLink: https://bugs.launchpad.net/bugs/2016398
> > > > >
> > > > > Opened files reported in /proc/pid/map_files can be shows with the wrong
> > > > > mount point using overlayfs with filesystem namspaces.
> > > > >
> > > > > This incorrect behavior is fixed:
> > > > >
> > > > >   UBUNTU: SAUCE: overlayfs: fix incorrect mnt_id of files opened from map_files
> > > > >
> > > > > However, the fix introduced a new regression, the reference to the
> > > > > original file stored in vma->vm_prfile is not properly released when
> > > > > vma->vm_prfile is replaced with a new file.
> > > > >
> > > > > This can cause a reference counter unbalance, leading errors such as
> > > > > "target is busy" when trying to unmount overlayfs, even if the
> > > > > filesystem has not active reference.
> > > > >
> > > > > Fix by properly releasing the original file stored in vm_prfile.
> > > > >
> > > > > Fixes: 508fdae3f62dd ("UBUNTU: SAUCE: overlayfs: fix incorrect mnt_id of files opened from map_files")
> > > > > Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
> > > > > ---
> > > > >  fs/overlayfs/file.c | 59 +++++++++++++++++++++++++++------------------
> > > > >  1 file changed, 36 insertions(+), 23 deletions(-)
> > > > >
> > > > > diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> > > > > index 366a4267d5f8..565010d2c9dd 100644
> > > > > --- a/fs/overlayfs/file.c
> > > > > +++ b/fs/overlayfs/file.c
> > > > > @@ -373,17 +373,48 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
> > > > >         return ret;
> > > > >  }
> > > > >
> > > > > -/* handle vma->vm_prfile */
> > > > > +/*
> > > > > + * In map_files_get_link() (fs/proc/base.c)
> > > > > + * we need to determine correct path from overlayfs.
> > > > > + * But real_mount(realfile->f_path.mnt) may be not
> > > > > + * equal to real_mount(file->f_path.mnt). In such case
> > > > > + * fdinfo of the same file which was opened from
> > > > > + * /proc/<pid>/map_files/... and "usual" path
> > > > > + * will show different mnt_id.
> > > > > + *
> > > > > + * We solve issue like in aufs by using additional
> > > > > + * field on struct vm_area_struct called "vm_prfile"
> > > > > + * which is used only for fdinfo/"printing" needs.
> > > > > + *
> > > > > + * See also mm/prfile.c
> > > > > + */
> > > > > +#ifdef CONFIG_MMU
> > > > >  static void ovl_vm_prfile_set(struct vm_area_struct *vma,
> > > > >                               struct file *file)
> > > > >  {
> > > > >         get_file(file);
> > > > > -       vma->vm_prfile = file;
> > > > > -#ifndef CONFIG_MMU
> > > > > +       swap(vma->vm_prfile, file);
> > > > > +       /* Drop reference count from previous file value */
> > > > > +       if (file)
> > > > > +               fput(file);
> > > > > +}
> > > > > +#else
> > > > > +static void ovl_vm_prfile_set(struct vm_area_struct *vma,
> > > > > +                             struct file *file)
> > > > > +{
> > > > > +       struct file *vm_region_file = file;
> > > > > +
> > > > >         get_file(file);
> > > > > -       vma->vm_region->vm_prfile = file;
> > > > > -#endif
> > > > > +       get_file(vm_region_file);
> > > > > +       swap(vma->vm_prfile, file);
> > > > > +       swap(vma->vm_region->vm_prfile, vm_region_file);
> > > > > +       /* Drop reference count from previous file values */
> > > > > +       if (file)
> > > > > +               fput(file);
> > > > > +       if (vm_region_file)
> > > > > +               fput(vm_region_file);
> > > > >  }
> > > > > +#endif
> > > > >
> > > > >  static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
> > > > >  {
> > > > > @@ -411,25 +442,7 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
> > > > >                 vma->vm_file = file;
> > > > >                 fput(realfile);
> > > > >         } else {
> > > > > -               /*
> > > > > -                * In map_files_get_link() (fs/proc/base.c)
> > > > > -                * we need to determine correct path from overlayfs.
> > > > > -                * But real_mount(realfile->f_path.mnt) may be not
> > > > > -                * equal to real_mount(file->f_path.mnt). In such case
> > > > > -                * fdinfo of the same file which was opened from
> > > > > -                * /proc/<pid>/map_files/... and "usual" path
> > > > > -                * will show different mnt_id.
> > > > > -                *
> > > > > -                * We solve issue like in aufs by using additional
> > > > > -                * field on struct vm_area_struct called "vm_prfile"
> > > > > -                * which is used only for fdinfo/"printing" needs.
> > > > > -                *
> > > > > -                * See also mm/prfile.c
> > > > > -                */
> > > > >                 ovl_vm_prfile_set(vma, file);
> > > > > -
> > > > > -               /* Drop reference count from previous vm_file value */
> > > > > -               fput(file);
> > > >
> > > > I'm not sure that we need to touch this line because it is not from
> > > > the Ubuntu Sauce patch, but from the original Linux kernel tree.
> > > > Removing it can cause a leak.
> > >
> > > That line is not from the SAUCE patch, but before my change
> > > ovl_vm_prfile_set() wasn't doing an fput(file). Now that
> > > ovl_vm_prfile_set() is also taking care of doing fput(file), I think we
> > > need to remove that line, otherwise we would release the file twice.
> >
> > fput inside ovl_vm_prfile_set releases the struct file that was in
> > vma->vm_prfile (if it is not NULL),
> > but this fput(file) is not about that. It releases and an old struct
> > file reference that was in vma->vm_file
> > when ovl_mmap() was called. This reference was taken in mmap_region()
> > just before the call_mmap() call.
> > So, as in ovl_mmap we replace vma->vm_file with a new one, we need to
> > release an old reference.
>
> Oh yes, you are absolutely right, sorry I looked at the code too
> quickly.  Nice catch! I'll send a new version for the focal patch.

;-)

It's too easy to make a mistake in hard cases like this one.

Kind regards,
Alex

>
> Thanks!
> -Andrea
>
> >
> > Please note, that vma->vm_file != vma->vm_prfile.
> >
> > Kind regards,
> > Alex
> >
> > >
> > > -Andrea
diff mbox series

Patch

diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 366a4267d5f8..565010d2c9dd 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -373,17 +373,48 @@  static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 	return ret;
 }
 
-/* handle vma->vm_prfile */
+/*
+ * In map_files_get_link() (fs/proc/base.c)
+ * we need to determine correct path from overlayfs.
+ * But real_mount(realfile->f_path.mnt) may be not
+ * equal to real_mount(file->f_path.mnt). In such case
+ * fdinfo of the same file which was opened from
+ * /proc/<pid>/map_files/... and "usual" path
+ * will show different mnt_id.
+ *
+ * We solve issue like in aufs by using additional
+ * field on struct vm_area_struct called "vm_prfile"
+ * which is used only for fdinfo/"printing" needs.
+ *
+ * See also mm/prfile.c
+ */
+#ifdef CONFIG_MMU
 static void ovl_vm_prfile_set(struct vm_area_struct *vma,
 			      struct file *file)
 {
 	get_file(file);
-	vma->vm_prfile = file;
-#ifndef CONFIG_MMU
+	swap(vma->vm_prfile, file);
+	/* Drop reference count from previous file value */
+	if (file)
+		fput(file);
+}
+#else
+static void ovl_vm_prfile_set(struct vm_area_struct *vma,
+			      struct file *file)
+{
+	struct file *vm_region_file = file;
+
 	get_file(file);
-	vma->vm_region->vm_prfile = file;
-#endif
+	get_file(vm_region_file);
+	swap(vma->vm_prfile, file);
+	swap(vma->vm_region->vm_prfile, vm_region_file);
+	/* Drop reference count from previous file values */
+	if (file)
+		fput(file);
+	if (vm_region_file)
+		fput(vm_region_file);
 }
+#endif
 
 static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
 {
@@ -411,25 +442,7 @@  static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
 		vma->vm_file = file;
 		fput(realfile);
 	} else {
-		/*
-		 * In map_files_get_link() (fs/proc/base.c)
-		 * we need to determine correct path from overlayfs.
-		 * But real_mount(realfile->f_path.mnt) may be not
-		 * equal to real_mount(file->f_path.mnt). In such case
-		 * fdinfo of the same file which was opened from
-		 * /proc/<pid>/map_files/... and "usual" path
-		 * will show different mnt_id.
-		 *
-		 * We solve issue like in aufs by using additional
-		 * field on struct vm_area_struct called "vm_prfile"
-		 * which is used only for fdinfo/"printing" needs.
-		 *
-		 * See also mm/prfile.c
-		 */
 		ovl_vm_prfile_set(vma, file);
-
-		/* Drop reference count from previous vm_file value */
-		fput(file);
 	}
 
 	ovl_file_accessed(file);