diff mbox series

[1/2] fs/dcache: introduce d_alloc_parallel_check_existing

Message ID 20240705062621.630604-2-eugen.hristev@collabora.com
State New
Headers show
Series fs/dcache: fix cache inconsistency on case-insensitive lookups | expand

Commit Message

Eugen Hristev July 5, 2024, 6:26 a.m. UTC
d_alloc_parallel currently looks for entries that match the name being
added and return them if found.
However if d_alloc_parallel is being called during the process of adding
a new entry (that becomes in_lookup), the same entry is found by
d_alloc_parallel in the in_lookup_hash and d_alloc_parallel will wait
forever for it to stop being in lookup.
To avoid this, it makes sense to check for an entry being currently
added (e.g. by d_add_ci from a lookup func, like xfs is doing) and if this
exact match is found, return the entry.
This way, to add a new entry , as xfs is doing, is the following flow:
_lookup_slow -> d_alloc_parallel -> entry is being created -> xfs_lookup ->
d_add_ci -> d_alloc_parallel_check_existing(entry created before) ->
d_splice_alias.
The initial entry stops being in_lookup after d_splice_alias finishes, and
it's returned to d_add_ci by d_alloc_parallel_check_existing.
Without d_alloc_parallel_check_existing, d_alloc_parallel would be called
instead and wait forever for the entry to stop being in lookup, as the
iteration through the in_lookup_hash matches the entry.
Currently XFS does not hang because it creates another entry in the second
call of d_alloc_parallel if the name differs by case as the hashing and
comparison functions used by XFS are not case insensitive.

Signed-off-by: Eugen Hristev <eugen.hristev@collabora.com>
---
 fs/dcache.c            | 29 +++++++++++++++++++++++------
 include/linux/dcache.h |  4 ++++
 2 files changed, 27 insertions(+), 6 deletions(-)

Comments

Gabriel Krisman Bertazi Aug. 20, 2024, 8:16 p.m. UTC | #1
Eugen Hristev <eugen.hristev@collabora.com> writes:

> d_alloc_parallel currently looks for entries that match the name being
> added and return them if found.
> However if d_alloc_parallel is being called during the process of adding
> a new entry (that becomes in_lookup), the same entry is found by
> d_alloc_parallel in the in_lookup_hash and d_alloc_parallel will wait
> forever for it to stop being in lookup.
> To avoid this, it makes sense to check for an entry being currently
> added (e.g. by d_add_ci from a lookup func, like xfs is doing) and if this
> exact match is found, return the entry.
> This way, to add a new entry , as xfs is doing, is the following flow:
> _lookup_slow -> d_alloc_parallel -> entry is being created -> xfs_lookup ->
> d_add_ci -> d_alloc_parallel_check_existing(entry created before) ->
> d_splice_alias.

Hi Eugen,

I have a hard time understanding what xfs has anything to do with this.
xfs already users d_add_ci without problems.  The issue is that
ext4/f2fs have case-insensitive d_compare/d_hash functions, so they will
find the dentry-under-lookup itself here. Xfs doesn't have that problem
at all because it doesn't try to match case-inexact names in the dcache.

> The initial entry stops being in_lookup after d_splice_alias finishes, and
> it's returned to d_add_ci by d_alloc_parallel_check_existing.
> Without d_alloc_parallel_check_existing, d_alloc_parallel would be called
> instead and wait forever for the entry to stop being in lookup, as the
> iteration through the in_lookup_hash matches the entry.
> Currently XFS does not hang because it creates another entry in the second
> call of d_alloc_parallel if the name differs by case as the hashing and
> comparison functions used by XFS are not case insensitive.
>
> Signed-off-by: Eugen Hristev <eugen.hristev@collabora.com>
> ---
>  fs/dcache.c            | 29 +++++++++++++++++++++++------
>  include/linux/dcache.h |  4 ++++
>  2 files changed, 27 insertions(+), 6 deletions(-)
>
> diff --git a/fs/dcache.c b/fs/dcache.c
> index a0a944fd3a1c..459a3d8b8bdb 100644
> --- a/fs/dcache.c
> +++ b/fs/dcache.c
> @@ -2049,8 +2049,9 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
>  		return found;
>  	}
>  	if (d_in_lookup(dentry)) {
> -		found = d_alloc_parallel(dentry->d_parent, name,
> -					dentry->d_wait);
> +		found = d_alloc_parallel_check_existing(dentry,
> +							dentry->d_parent, name,
> +							dentry->d_wait);
>  		if (IS_ERR(found) || !d_in_lookup(found)) {
>  			iput(inode);
>  			return found;
> @@ -2452,9 +2453,10 @@ static void d_wait_lookup(struct dentry *dentry)
>  	}
>  }
>  
> -struct dentry *d_alloc_parallel(struct dentry *parent,
> -				const struct qstr *name,
> -				wait_queue_head_t *wq)
> +struct dentry *d_alloc_parallel_check_existing(struct dentry *d_check,
> +					       struct dentry *parent,
> +					       const struct qstr *name,
> +					       wait_queue_head_t *wq)
>  {
>  	unsigned int hash = name->hash;
>  	struct hlist_bl_head *b = in_lookup_hash(parent, hash);
> @@ -2523,6 +2525,14 @@ struct dentry *d_alloc_parallel(struct dentry *parent,
>  		}
>  
>  		rcu_read_unlock();
> +
> +		/* if the entry we found is the same as the original we
> +		 * are checking against, then return it
> +		 */
> +		if (d_check == dentry) {
> +			dput(new);
> +			return dentry;
> +		}

The point of the patchset is to install a dentry with the disk-name in
the dcache if the name isn't an exact match to the name of the
dentry-under-lookup.  But, since you return the same
dentry-under-lookup, d_add_ci will just splice that dentry into the
cache - which is exactly the same as just doing d_splice_alias(dentry) at
the end of ->d_lookup() like we currently do, no?  Shreeya's idea in
that original patchset was to return a new dentry with the new name.

>  		/*
>  		 * somebody is likely to be still doing lookup for it;
>  		 * wait for them to finish
> @@ -2560,8 +2570,15 @@ struct dentry *d_alloc_parallel(struct dentry *parent,
>  	dput(dentry);
>  	goto retry;
>  }
> -EXPORT_SYMBOL(d_alloc_parallel);
> +EXPORT_SYMBOL(d_alloc_parallel_check_existing);
>  
> +struct dentry *d_alloc_parallel(struct dentry *parent,
> +				const struct qstr *name,
> +				wait_queue_head_t *wq)
> +{
> +	return d_alloc_parallel_check_existing(NULL, parent, name, wq);
> +}
> +EXPORT_SYMBOL(d_alloc_parallel);
>  /*
>   * - Unhash the dentry
>   * - Retrieve and clear the waitqueue head in dentry
> diff --git a/include/linux/dcache.h b/include/linux/dcache.h
> index bf53e3894aae..6eb21a518cb0 100644
> --- a/include/linux/dcache.h
> +++ b/include/linux/dcache.h
> @@ -232,6 +232,10 @@ extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
>  extern struct dentry * d_alloc_anon(struct super_block *);
>  extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *,
>  					wait_queue_head_t *);
> +extern struct dentry * d_alloc_parallel_check_existing(struct dentry *,
> +						       struct dentry *,
> +						       const struct qstr *,
> +						       wait_queue_head_t *);
>  extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
>  extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
>  extern bool d_same_name(const struct dentry *dentry, const struct dentry *parent,
Eugen Hristev Aug. 21, 2024, 9:10 a.m. UTC | #2
On 8/20/24 23:16, Gabriel Krisman Bertazi wrote:
> Eugen Hristev <eugen.hristev@collabora.com> writes:
> 
>> d_alloc_parallel currently looks for entries that match the name being
>> added and return them if found.
>> However if d_alloc_parallel is being called during the process of adding
>> a new entry (that becomes in_lookup), the same entry is found by
>> d_alloc_parallel in the in_lookup_hash and d_alloc_parallel will wait
>> forever for it to stop being in lookup.
>> To avoid this, it makes sense to check for an entry being currently
>> added (e.g. by d_add_ci from a lookup func, like xfs is doing) and if this
>> exact match is found, return the entry.
>> This way, to add a new entry , as xfs is doing, is the following flow:
>> _lookup_slow -> d_alloc_parallel -> entry is being created -> xfs_lookup ->
>> d_add_ci -> d_alloc_parallel_check_existing(entry created before) ->
>> d_splice_alias.
> 
> Hi Eugen,

Hello Krisman,

> 
> I have a hard time understanding what xfs has anything to do with this.

It has because xfs has been given as an example a few times about how a FS
implementation should use d_add_ci.

> xfs already users d_add_ci without problems.  The issue is that
> ext4/f2fs have case-insensitive d_compare/d_hash functions, so they will
> find the dentry-under-lookup itself here. Xfs doesn't have that problem
> at all because it doesn't try to match case-inexact names in the dcache.

That's right. So xfs cannot be given as an example, as it does not make
case-inexact dentries and lookup.

> 
>> The initial entry stops being in_lookup after d_splice_alias finishes, and
>> it's returned to d_add_ci by d_alloc_parallel_check_existing.
>> Without d_alloc_parallel_check_existing, d_alloc_parallel would be called
>> instead and wait forever for the entry to stop being in lookup, as the
>> iteration through the in_lookup_hash matches the entry.
>> Currently XFS does not hang because it creates another entry in the second
>> call of d_alloc_parallel if the name differs by case as the hashing and
>> comparison functions used by XFS are not case insensitive.
>>
>> Signed-off-by: Eugen Hristev <eugen.hristev@collabora.com>
>> ---
>>  fs/dcache.c            | 29 +++++++++++++++++++++++------
>>  include/linux/dcache.h |  4 ++++
>>  2 files changed, 27 insertions(+), 6 deletions(-)
>>
>> diff --git a/fs/dcache.c b/fs/dcache.c
>> index a0a944fd3a1c..459a3d8b8bdb 100644
>> --- a/fs/dcache.c
>> +++ b/fs/dcache.c
>> @@ -2049,8 +2049,9 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
>>  		return found;
>>  	}
>>  	if (d_in_lookup(dentry)) {
>> -		found = d_alloc_parallel(dentry->d_parent, name,
>> -					dentry->d_wait);
>> +		found = d_alloc_parallel_check_existing(dentry,
>> +							dentry->d_parent, name,
>> +							dentry->d_wait);
>>  		if (IS_ERR(found) || !d_in_lookup(found)) {
>>  			iput(inode);
>>  			return found;
>> @@ -2452,9 +2453,10 @@ static void d_wait_lookup(struct dentry *dentry)
>>  	}
>>  }
>>  
>> -struct dentry *d_alloc_parallel(struct dentry *parent,
>> -				const struct qstr *name,
>> -				wait_queue_head_t *wq)
>> +struct dentry *d_alloc_parallel_check_existing(struct dentry *d_check,
>> +					       struct dentry *parent,
>> +					       const struct qstr *name,
>> +					       wait_queue_head_t *wq)
>>  {
>>  	unsigned int hash = name->hash;
>>  	struct hlist_bl_head *b = in_lookup_hash(parent, hash);
>> @@ -2523,6 +2525,14 @@ struct dentry *d_alloc_parallel(struct dentry *parent,
>>  		}
>>  
>>  		rcu_read_unlock();
>> +
>> +		/* if the entry we found is the same as the original we
>> +		 * are checking against, then return it
>> +		 */
>> +		if (d_check == dentry) {
>> +			dput(new);
>> +			return dentry;
>> +		}
> 
> The point of the patchset is to install a dentry with the disk-name in
> the dcache if the name isn't an exact match to the name of the
> dentry-under-lookup.  But, since you return the same
> dentry-under-lookup, d_add_ci will just splice that dentry into the
> cache - which is exactly the same as just doing d_splice_alias(dentry) at
> the end of ->d_lookup() like we currently do, no?  Shreeya's idea in
> that original patchset was to return a new dentry with the new name.

Yes, but we cannot add another dentry for the same file with a different case.
That would break everything about dentry lookups, etc.
We need to have the one dentry in the cache which use the right case. Regardless of
the case of the lookup.

As Al Viro said here :
https://lore.kernel.org/lkml/YVmyYP25kgGq9uEy@zeniv-ca.linux.org.uk/
we cannot have parallel lookups for names that would compare as equals (two
different dentries for the same file with different case).

So yes, I return the same dentry-under-lookup, because that's the purpose of that
search, return it, have it use the right case, and then splice it to the cache.

In the end we will have the dentry use the right case and not the case that we used
for the search, namely, the original filename from the disk. That was the purpose
of the patch.

Eugen

> 
>>  		/*
>>  		 * somebody is likely to be still doing lookup for it;
>>  		 * wait for them to finish
>> @@ -2560,8 +2570,15 @@ struct dentry *d_alloc_parallel(struct dentry *parent,
>>  	dput(dentry);
>>  	goto retry;
>>  }
>> -EXPORT_SYMBOL(d_alloc_parallel);
>> +EXPORT_SYMBOL(d_alloc_parallel_check_existing);
>>  
>> +struct dentry *d_alloc_parallel(struct dentry *parent,
>> +				const struct qstr *name,
>> +				wait_queue_head_t *wq)
>> +{
>> +	return d_alloc_parallel_check_existing(NULL, parent, name, wq);
>> +}
>> +EXPORT_SYMBOL(d_alloc_parallel);
>>  /*
>>   * - Unhash the dentry
>>   * - Retrieve and clear the waitqueue head in dentry
>> diff --git a/include/linux/dcache.h b/include/linux/dcache.h
>> index bf53e3894aae..6eb21a518cb0 100644
>> --- a/include/linux/dcache.h
>> +++ b/include/linux/dcache.h
>> @@ -232,6 +232,10 @@ extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
>>  extern struct dentry * d_alloc_anon(struct super_block *);
>>  extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *,
>>  					wait_queue_head_t *);
>> +extern struct dentry * d_alloc_parallel_check_existing(struct dentry *,
>> +						       struct dentry *,
>> +						       const struct qstr *,
>> +						       wait_queue_head_t *);
>>  extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
>>  extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
>>  extern bool d_same_name(const struct dentry *dentry, const struct dentry *parent,
>
Gabriel Krisman Bertazi Aug. 21, 2024, 11:22 p.m. UTC | #3
Eugen Hristev <eugen.hristev@collabora.com> writes:

> Yes, but we cannot add another dentry for the same file with a different case.
> That would break everything about dentry lookups, etc.
> We need to have the one dentry in the cache which use the right case. Regardless of
> the case of the lookup.
>
> As Al Viro said here :
> https://lore.kernel.org/lkml/YVmyYP25kgGq9uEy@zeniv-ca.linux.org.uk/
> we cannot have parallel lookups for names that would compare as equals (two
> different dentries for the same file with different case).
>
> So yes, I return the same dentry-under-lookup, because that's the purpose of that
> search, return it, have it use the right case, and then splice it to the cache.

It is not changing the case of the returned dentry.  The patch simply
returns the same dentry you sent to d_alloc_parallel, which is then
spliced into the cache. Exactly as if you had issued d_splice_alias
directly.  You are just doing a hop in d_alloc_parallel and finding the
same dentry.

A quick test case below. You can print the ->d_name through
several methods. I'm doing it by reading /proc/self/cwd.

$ # In a case-insensitive filesystem
$ mkdir cf &&  chattr +F cf
$ mkdir cf/hello
$ echo 3 > /proc/sys/vm/drop_caches    # drop the dentry created above
$ cd cf/HELLO                          # provoke a case-inexact lookup.
$ readlink /proc/self/cwd

If we replaced the dentry with the disk name, it should
print <mnt>/cf/hello.  With your patch, it still prints <mnt>/cf/HELLO

Al,

Would it be acceptable to just change the dentry->d_name here in a new
flavor of d_add_ci used only by these filesystems? We are inside the
creation path, so the dentry has never been hashed.  Concurrent lookups
will be stuck in d_wait_lookup() until we are done and will never become
invalid after the change because the lookup was already done
case-insensitively, so they all match the same dentry, per-definition,
and we know there is no other matching dentries in the directory.  We'd
only need to be careful not to expose partial names to concurrent
parallel lookups.
Al Viro Aug. 22, 2024, 1:13 a.m. UTC | #4
On Wed, Aug 21, 2024 at 07:22:39PM -0400, Gabriel Krisman Bertazi wrote:

> Would it be acceptable to just change the dentry->d_name here in a new
> flavor of d_add_ci used only by these filesystems? We are inside the
> creation path, so the dentry has never been hashed.  Concurrent lookups
> will be stuck in d_wait_lookup() until we are done and will never become
> invalid after the change because the lookup was already done
> case-insensitively, so they all match the same dentry, per-definition,
> and we know there is no other matching dentries in the directory.  We'd
> only need to be careful not to expose partial names to concurrent
> parallel lookups.

*Ow*

->d_name stability rules are already convoluted as hell; that would make
them even more painful.

What locking are you going to use there?
Gabriel Krisman Bertazi Aug. 22, 2024, 5:25 p.m. UTC | #5
Al Viro <viro@zeniv.linux.org.uk> writes:

> On Wed, Aug 21, 2024 at 07:22:39PM -0400, Gabriel Krisman Bertazi wrote:
>
>> Would it be acceptable to just change the dentry->d_name here in a new
>> flavor of d_add_ci used only by these filesystems? We are inside the
>> creation path, so the dentry has never been hashed.  Concurrent lookups
>> will be stuck in d_wait_lookup() until we are done and will never become
>> invalid after the change because the lookup was already done
>> case-insensitively, so they all match the same dentry, per-definition,
>> and we know there is no other matching dentries in the directory.  We'd
>> only need to be careful not to expose partial names to concurrent
>> parallel lookups.
>
> *Ow*
>
> ->d_name stability rules are already convoluted as hell; that would make
> them even more painful.
>
> What locking are you going to use there?

Since we are in the ->d_lookup() during the rename, and we use the
dcache-insensitively for the filesystems that will do the rename, we
know there is nothing in the dcache and the dentry is still in the
parallel lookup table.  So we are not racing with a creation of the same
name in the same directory.  A parallel lookup will either find that
dentry (old or new name, doesn't matter) or not find anything, in case
it sees a partial ->d_name.  Therefore, the only possible problem is a
false negative/positive in parent->d_in_lookup_hash.

Can we extend the rename_lock seqlock protection that already exists in
d_alloc_parallel to include the d_in_lookup_hash walk?  d_add_ci then
acquires the rename_lock before writing ->d_name and d_alloc_parallel
will see it changed after iterating over d_in_lookup_hash, in case it
didn't find anything, and retry the entire sequence.

Case-inexact lookups are not supposed to be frequent. Most lookups
should be done in a case-exact way, so the extra acquisition of
rename_lock shouldn't create more contention on the rename_lock for the
regular path or for non-case-insensitive filesystems.  The overhead in
d_alloc_parallel is another read_seqretry() that is done only in the
case where the dentry is not found anywhere and should be created.
diff mbox series

Patch

diff --git a/fs/dcache.c b/fs/dcache.c
index a0a944fd3a1c..459a3d8b8bdb 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2049,8 +2049,9 @@  struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
 		return found;
 	}
 	if (d_in_lookup(dentry)) {
-		found = d_alloc_parallel(dentry->d_parent, name,
-					dentry->d_wait);
+		found = d_alloc_parallel_check_existing(dentry,
+							dentry->d_parent, name,
+							dentry->d_wait);
 		if (IS_ERR(found) || !d_in_lookup(found)) {
 			iput(inode);
 			return found;
@@ -2452,9 +2453,10 @@  static void d_wait_lookup(struct dentry *dentry)
 	}
 }
 
-struct dentry *d_alloc_parallel(struct dentry *parent,
-				const struct qstr *name,
-				wait_queue_head_t *wq)
+struct dentry *d_alloc_parallel_check_existing(struct dentry *d_check,
+					       struct dentry *parent,
+					       const struct qstr *name,
+					       wait_queue_head_t *wq)
 {
 	unsigned int hash = name->hash;
 	struct hlist_bl_head *b = in_lookup_hash(parent, hash);
@@ -2523,6 +2525,14 @@  struct dentry *d_alloc_parallel(struct dentry *parent,
 		}
 
 		rcu_read_unlock();
+
+		/* if the entry we found is the same as the original we
+		 * are checking against, then return it
+		 */
+		if (d_check == dentry) {
+			dput(new);
+			return dentry;
+		}
 		/*
 		 * somebody is likely to be still doing lookup for it;
 		 * wait for them to finish
@@ -2560,8 +2570,15 @@  struct dentry *d_alloc_parallel(struct dentry *parent,
 	dput(dentry);
 	goto retry;
 }
-EXPORT_SYMBOL(d_alloc_parallel);
+EXPORT_SYMBOL(d_alloc_parallel_check_existing);
 
+struct dentry *d_alloc_parallel(struct dentry *parent,
+				const struct qstr *name,
+				wait_queue_head_t *wq)
+{
+	return d_alloc_parallel_check_existing(NULL, parent, name, wq);
+}
+EXPORT_SYMBOL(d_alloc_parallel);
 /*
  * - Unhash the dentry
  * - Retrieve and clear the waitqueue head in dentry
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index bf53e3894aae..6eb21a518cb0 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -232,6 +232,10 @@  extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
 extern struct dentry * d_alloc_anon(struct super_block *);
 extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *,
 					wait_queue_head_t *);
+extern struct dentry * d_alloc_parallel_check_existing(struct dentry *,
+						       struct dentry *,
+						       const struct qstr *,
+						       wait_queue_head_t *);
 extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
 extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
 extern bool d_same_name(const struct dentry *dentry, const struct dentry *parent,