Message ID | 1480629741-18375-4-git-send-email-richard@nod.at |
---|---|
State | Deferred |
Delegated to: | Richard Weinberger |
Headers | show |
On Thu, Dec 01, 2016 at 11:02:18PM +0100, Richard Weinberger wrote: > This is the first step to support proper telldir/seekdir() > in UBIFS. > Let's report 64bit cookies in readdir(). The cookie is a combination > of the entry key plus the double hash value. Would it be possible to explain what that means in a little detail, for a ubifs-ignoramus? I'm just curious how it meets the requirements for nfs exports. --b. > > Signed-off-by: Richard Weinberger <richard@nod.at> > --- > fs/ubifs/dir.c | 46 +++++++++++++++++++++++++++++++------------ > fs/ubifs/key.h | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > fs/ubifs/ubifs.h | 1 + > 3 files changed, 94 insertions(+), 12 deletions(-) > > diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c > index 883b2fdf51df..3b8c08dad75b 100644 > --- a/fs/ubifs/dir.c > +++ b/fs/ubifs/dir.c > @@ -539,7 +539,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) > > dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, ctx->pos); > > - if (ctx->pos > UBIFS_S_KEY_HASH_MASK || ctx->pos == 2) > + if (ctx->pos == 2) > /* > * The directory was seek'ed to a senseless position or there > * are no more entries. > @@ -594,7 +594,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) > goto out; > } > > - ctx->pos = key_hash_flash(c, &dent->key); > + ctx->pos = key_get_dir_pos(c, file, dent); > file->private_data = dent; > } > > @@ -604,21 +604,43 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) > * The directory was seek'ed to and is now readdir'ed. > * Find the entry corresponding to @ctx->pos or the closest one. > */ > - dent_key_init_hash(c, &key, dir->i_ino, ctx->pos); > - fname_len(&nm) = 0; > - dent = ubifs_tnc_next_ent(c, &key, &nm); > - if (IS_ERR(dent)) { > - err = PTR_ERR(dent); > - goto out; > + dent_key_init_hash(c, &key, dir->i_ino, > + key_get_hash_from_dir_pos(c, file, ctx->pos)); > + > + if (key_want_short_hash(file)) { > + err = -ENOENT; > + } else { > + dent = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS); > + if (!dent) { > + err = -ENOMEM; > + goto out; > + } > + > + err = ubifs_tnc_lookup_dh(c, &key, dent, > + key_get_cookie_from_dir_pos(c, ctx->pos)); > + } > + if (err) { > + kfree(dent); > + > + if (err < 0 && err != -ENOENT && err != -EOPNOTSUPP) > + goto out; > + > + fname_len(&nm) = 0; > + dent = ubifs_tnc_next_ent(c, &key, &nm); > + if (IS_ERR(dent)) { > + err = PTR_ERR(dent); > + goto out; > + } > } > - ctx->pos = key_hash_flash(c, &dent->key); > + > + ctx->pos = key_get_dir_pos(c, file, dent); > file->private_data = dent; > } > > while (1) { > - dbg_gen("feed '%s', ino %llu, new f_pos %#x", > + dbg_gen("feed '%s', ino %llu, new f_pos %#lx", > dent->name, (unsigned long long)le64_to_cpu(dent->inum), > - key_hash_flash(c, &dent->key)); > + (unsigned long)key_get_dir_pos(c, file, dent)); > ubifs_assert(le64_to_cpu(dent->ch.sqnum) > > ubifs_inode(dir)->creat_sqnum); > > @@ -656,7 +678,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) > } > > kfree(file->private_data); > - ctx->pos = key_hash_flash(c, &dent->key); > + ctx->pos = key_get_dir_pos(c, file, dent); > file->private_data = dent; > cond_resched(); > } > diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h > index 7547be512db2..2788e36ce832 100644 > --- a/fs/ubifs/key.h > +++ b/fs/ubifs/key.h > @@ -397,6 +397,65 @@ static inline uint32_t key_hash_flash(const struct ubifs_info *c, const void *k) > } > > /** > + * key_want_short_hash - tests whether we can emit a 64bit hash or not. > + * @file: the file handle of the directory > + */ > +static inline bool key_want_short_hash(struct file *file) > +{ > + if (file->f_mode & FMODE_32BITHASH) > + return true; > + > + if (!(file->f_mode & FMODE_64BITHASH) && is_32bit_api()) > + return true; > + > + return false; > +} > + > +/** > + * key_dir_pos - compute a 64bit directory cookie for readdir() > + * @c: UBIFS file-system description object > + * @file: the file handle of the directory > + * @dent: the directory entry > + */ > +static inline loff_t key_get_dir_pos(const struct ubifs_info *c, > + struct file *file, > + struct ubifs_dent_node *dent) > +{ > + BUILD_BUG_ON(sizeof(loff_t) < 8); > + > + if (key_want_short_hash(file)) > + return key_hash_flash(c, &dent->key); > + > + return ((loff_t)key_hash_flash(c, &dent->key)) << UBIFS_DH_BITS | le32_to_cpu(dent->cookie); > +} > + > +/** > + * key_get_hash_from_dir_pos - extracts the flash key from a directory offset. > + * @c: UBIFS file-system description object > + * @file: the file handle of the directory > + * @pos: the directory offset provied by VFS > + */ > +static inline uint32_t key_get_hash_from_dir_pos(const struct ubifs_info *c, > + struct file *file, loff_t pos) > +{ > + if (key_want_short_hash(file)) > + return pos & UBIFS_S_KEY_HASH_MASK; > + > + return (pos >> UBIFS_DH_BITS) & UBIFS_S_KEY_HASH_MASK; > +} > + > +/** > + * key_get_cookie_from_dir_pos - extracts the double hash cookie from a directory offset. > + * @c: UBIFS file-system description object > + * @pos: the directory offset provied by VFS > + */ > +static inline uint32_t key_get_cookie_from_dir_pos(const struct ubifs_info *c, > + loff_t pos) > +{ > + return pos & UBIFS_DH_MASK; > +} > + > +/** > * key_block - get data block number. > * @c: UBIFS file-system description object > * @key: the key to get the block number from > diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h > index 12f3df3ced0e..0532a6f82b1d 100644 > --- a/fs/ubifs/ubifs.h > +++ b/fs/ubifs/ubifs.h > @@ -40,6 +40,7 @@ > #include <linux/xattr.h> > #include <linux/fscrypto.h> > #include <linux/random.h> > +#include <linux/compat.h> > #include "ubifs-media.h" > > /* Version of this UBIFS implementation */ > -- > 2.7.3 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
Bruce, On 29.12.2016 03:58, J. Bruce Fields wrote: > On Thu, Dec 01, 2016 at 11:02:18PM +0100, Richard Weinberger wrote: >> This is the first step to support proper telldir/seekdir() >> in UBIFS. >> Let's report 64bit cookies in readdir(). The cookie is a combination >> of the entry key plus the double hash value. > > Would it be possible to explain what that means in a little detail, for > a ubifs-ignoramus? > > I'm just curious how it meets the requirements for nfs exports. Traditionally UBIFS had 32bit readdir() cookies, ctx->pos was a 32bit integer. This integer is the 32bit key out the UBIFS index tree. In ->lookup(), UBIFS computes the r5 hash of the requested file name which will be used as search key. Since the chance is high to face a hash collision in the 32bit space, UBIFS also does a string compare to find the correct directory entry for the given file name. For NFS, and fscrypt, UBIFS has to offer a way to lookup a directory entry by a given cookie without knowing the file name. So, UBIFS has no chance to detect or handle a hash collision. To deal with that UBIFS uses a similar trick as ext4 does, it stores another unique identifier of the file name which can be used as cookie. While ext4 stores two 32bit hash values, therefore the name double hash, which will be combined to a single 64bit cookie, UBIFS stores additionally a 32bit random number which will be generated upon directory entry creation. Using the 32bit hash value and the 32bit nonce it can provide a 64bit cookie. Lookup via cookie works like a regular lookup but instead of comparing strings it compares the nonce values. That way UBIFS can provide a 64bit readdir() cookie which is required for NFS3. Thanks, //richard
On Thu, Dec 29, 2016 at 10:19:27AM +0100, Richard Weinberger wrote: > Bruce, > > On 29.12.2016 03:58, J. Bruce Fields wrote: > > On Thu, Dec 01, 2016 at 11:02:18PM +0100, Richard Weinberger wrote: > >> This is the first step to support proper telldir/seekdir() > >> in UBIFS. > >> Let's report 64bit cookies in readdir(). The cookie is a combination > >> of the entry key plus the double hash value. > > > > Would it be possible to explain what that means in a little detail, for > > a ubifs-ignoramus? > > > > I'm just curious how it meets the requirements for nfs exports. > > Traditionally UBIFS had 32bit readdir() cookies, ctx->pos was a 32bit > integer. > This integer is the 32bit key out the UBIFS index tree. > > In ->lookup(), UBIFS computes the r5 hash of the requested file name > which will be used as search key. Since the chance is high to face a hash > collision in the 32bit space, UBIFS also does a string compare > to find the correct directory entry for the given file name. > > For NFS, and fscrypt, UBIFS has to offer a way to lookup a directory > entry by a given cookie without knowing the file name. > So, UBIFS has no chance to detect or handle a hash collision. > > To deal with that UBIFS uses a similar trick as ext4 does, it stores > another unique identifier of the file name which can be used as cookie. > While ext4 stores two 32bit hash values, therefore the name double hash, > which will be combined to a single 64bit cookie, UBIFS stores additionally > a 32bit random number which will be generated upon directory entry creation. > Using the 32bit hash value and the 32bit nonce it can provide a 64bit > cookie. > > Lookup via cookie works like a regular lookup but instead of comparing > strings it compares the nonce values. > > That way UBIFS can provide a 64bit readdir() cookie which is required for NFS3. Sounds good. And if a matching entry isn't found (as in the case of a concurrent unlink), what happens? The answer must be the same as for ext4, but I've forgotten the details.... I guess it must find the next highest cookie (thinking of the cookie as a 64-bit integer of some kind) that exists in the directory. And that must be the same order that readdir normally returns entries in. --b.
Bruce, On 29.12.2016 16:34, J. Bruce Fields wrote: >> That way UBIFS can provide a 64bit readdir() cookie which is required for NFS3. > > Sounds good. And if a matching entry isn't found (as in the case of a > concurrent unlink), what happens? The answer must be the same as for > ext4, but I've forgotten the details.... I guess it must find the next > highest cookie (thinking of the cookie as a 64-bit integer of some kind) > that exists in the directory. And that must be the same order that > readdir normally returns entries in. If a 64bit cookie is not found, the lookup function returns -ENOENT. In UBIFS we cannot just select a higher or lower key (cookie in this case), since it is the B-tree key and would point to a completely different entry. So, in case of a concurrent unlink() one would succeed and one fail with -ENOENT. Unless I miss something that seems okay to me. Thanks, //richard
On Thu, Dec 29, 2016 at 04:49:54PM +0100, Richard Weinberger wrote: > Bruce, > > On 29.12.2016 16:34, J. Bruce Fields wrote: > >> That way UBIFS can provide a 64bit readdir() cookie which is required for NFS3. > > > > Sounds good. And if a matching entry isn't found (as in the case of a > > concurrent unlink), what happens? The answer must be the same as for > > ext4, but I've forgotten the details.... I guess it must find the next > > highest cookie (thinking of the cookie as a 64-bit integer of some kind) > > that exists in the directory. And that must be the same order that > > readdir normally returns entries in. > > If a 64bit cookie is not found, the lookup function returns -ENOENT. > In UBIFS we cannot just select a higher or lower key (cookie in this case), > since it is the B-tree key and would point to a completely different > entry. > > So, in case of a concurrent unlink() one would succeed and one fail with > -ENOENT. Unless I miss something that seems okay to me. Unlink takes (parent directory, name), not a directory cookie. The problem is concurrent unlink and nfs readdir. So: NFS server returns readdir result with cookie X Somebody unlinks the entry at X. NFS server gets readdir request with cookie X. Then the NFS client will get a spurious -ENOENT. I'm not sure how best to reproduce that.... Maybe: Create a directory on an nfs-exported filesystem with lots of entries. Start a loop (or loops?) renaming directory entries within the directory as fast as possible (or deleting and creating entries; I assume it's the same thing for our purposes). read the directory from an nfs client. I'm not sure how many entries is "lots".... Ideally you want a single read of the directory to require the client to make lots of READDIR requests to the server. You could help by running: echo 1024 >/proc/fs/nfsd/max_block_size before starting knfsd. That should force it to return no more than 1K of data in each READDIR reply. --b.
Bruce, On 29.12.2016 17:15, J. Bruce Fields wrote: > On Thu, Dec 29, 2016 at 04:49:54PM +0100, Richard Weinberger wrote: >> Bruce, >> >> On 29.12.2016 16:34, J. Bruce Fields wrote: >>>> That way UBIFS can provide a 64bit readdir() cookie which is required for NFS3. >>> >>> Sounds good. And if a matching entry isn't found (as in the case of a >>> concurrent unlink), what happens? The answer must be the same as for >>> ext4, but I've forgotten the details.... I guess it must find the next >>> highest cookie (thinking of the cookie as a 64-bit integer of some kind) >>> that exists in the directory. And that must be the same order that >>> readdir normally returns entries in. >> >> If a 64bit cookie is not found, the lookup function returns -ENOENT. >> In UBIFS we cannot just select a higher or lower key (cookie in this case), >> since it is the B-tree key and would point to a completely different >> entry. >> >> So, in case of a concurrent unlink() one would succeed and one fail with >> -ENOENT. Unless I miss something that seems okay to me. > > Unlink takes (parent directory, name), not a directory cookie. > > The problem is concurrent unlink and nfs readdir. So: > > NFS server returns readdir result with cookie X > > Somebody unlinks the entry at X. > > NFS server gets readdir request with cookie X. > > Then the NFS client will get a spurious -ENOENT. Ah yes. Sorry I misunderstood your question. UBIFS readdir() address this already, if you ask it to readdir() from pos X and X is not present it will jump to the next best entry X'. UBIFS does so since ever. Thanks, //richard
On Thu, Dec 29, 2016 at 05:36:35PM +0100, Richard Weinberger wrote: > Bruce, > > On 29.12.2016 17:15, J. Bruce Fields wrote: > > On Thu, Dec 29, 2016 at 04:49:54PM +0100, Richard Weinberger wrote: > >> Bruce, > >> > >> On 29.12.2016 16:34, J. Bruce Fields wrote: > >>>> That way UBIFS can provide a 64bit readdir() cookie which is required for NFS3. > >>> > >>> Sounds good. And if a matching entry isn't found (as in the case of a > >>> concurrent unlink), what happens? The answer must be the same as for > >>> ext4, but I've forgotten the details.... I guess it must find the next > >>> highest cookie (thinking of the cookie as a 64-bit integer of some kind) > >>> that exists in the directory. And that must be the same order that > >>> readdir normally returns entries in. > >> > >> If a 64bit cookie is not found, the lookup function returns -ENOENT. > >> In UBIFS we cannot just select a higher or lower key (cookie in this case), > >> since it is the B-tree key and would point to a completely different > >> entry. > >> > >> So, in case of a concurrent unlink() one would succeed and one fail with > >> -ENOENT. Unless I miss something that seems okay to me. > > > > Unlink takes (parent directory, name), not a directory cookie. > > > > The problem is concurrent unlink and nfs readdir. So: > > > > NFS server returns readdir result with cookie X > > > > Somebody unlinks the entry at X. > > > > NFS server gets readdir request with cookie X. > > > > Then the NFS client will get a spurious -ENOENT. > > Ah yes. Sorry I misunderstood your question. > UBIFS readdir() address this already, if you ask it to readdir() > from pos X and X is not present it will jump to the next best entry X'. > UBIFS does so since ever. OK, good. So the random nonce is stored with the entry, and the hash you can always recalculate from the filename, so if you return entries in nonce+hash order, then you can keep that order stable even across deletes and renames. --b.
On 29.12.2016 17:59, J. Bruce Fields wrote: > OK, good. So the random nonce is stored with the entry, and the hash > you can always recalculate from the filename, so if you return entries > in nonce+hash order, then you can keep that order stable even across > deletes and renames. Yes. Thanks, //richard
On Thu, Dec 29, 2016 at 06:05:54PM +0100, Richard Weinberger wrote: > On 29.12.2016 17:59, J. Bruce Fields wrote: > > OK, good. So the random nonce is stored with the entry, and the hash > > you can always recalculate from the filename, so if you return entries > > in nonce+hash order, then you can keep that order stable even across > > deletes and renames. > > Yes. Thanks, feel free to add a Reviewed-by: J. Bruce Fields <bfields@redhat.com> to this and 6/6. --b.
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 883b2fdf51df..3b8c08dad75b 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -539,7 +539,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, ctx->pos); - if (ctx->pos > UBIFS_S_KEY_HASH_MASK || ctx->pos == 2) + if (ctx->pos == 2) /* * The directory was seek'ed to a senseless position or there * are no more entries. @@ -594,7 +594,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) goto out; } - ctx->pos = key_hash_flash(c, &dent->key); + ctx->pos = key_get_dir_pos(c, file, dent); file->private_data = dent; } @@ -604,21 +604,43 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) * The directory was seek'ed to and is now readdir'ed. * Find the entry corresponding to @ctx->pos or the closest one. */ - dent_key_init_hash(c, &key, dir->i_ino, ctx->pos); - fname_len(&nm) = 0; - dent = ubifs_tnc_next_ent(c, &key, &nm); - if (IS_ERR(dent)) { - err = PTR_ERR(dent); - goto out; + dent_key_init_hash(c, &key, dir->i_ino, + key_get_hash_from_dir_pos(c, file, ctx->pos)); + + if (key_want_short_hash(file)) { + err = -ENOENT; + } else { + dent = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS); + if (!dent) { + err = -ENOMEM; + goto out; + } + + err = ubifs_tnc_lookup_dh(c, &key, dent, + key_get_cookie_from_dir_pos(c, ctx->pos)); + } + if (err) { + kfree(dent); + + if (err < 0 && err != -ENOENT && err != -EOPNOTSUPP) + goto out; + + fname_len(&nm) = 0; + dent = ubifs_tnc_next_ent(c, &key, &nm); + if (IS_ERR(dent)) { + err = PTR_ERR(dent); + goto out; + } } - ctx->pos = key_hash_flash(c, &dent->key); + + ctx->pos = key_get_dir_pos(c, file, dent); file->private_data = dent; } while (1) { - dbg_gen("feed '%s', ino %llu, new f_pos %#x", + dbg_gen("feed '%s', ino %llu, new f_pos %#lx", dent->name, (unsigned long long)le64_to_cpu(dent->inum), - key_hash_flash(c, &dent->key)); + (unsigned long)key_get_dir_pos(c, file, dent)); ubifs_assert(le64_to_cpu(dent->ch.sqnum) > ubifs_inode(dir)->creat_sqnum); @@ -656,7 +678,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) } kfree(file->private_data); - ctx->pos = key_hash_flash(c, &dent->key); + ctx->pos = key_get_dir_pos(c, file, dent); file->private_data = dent; cond_resched(); } diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h index 7547be512db2..2788e36ce832 100644 --- a/fs/ubifs/key.h +++ b/fs/ubifs/key.h @@ -397,6 +397,65 @@ static inline uint32_t key_hash_flash(const struct ubifs_info *c, const void *k) } /** + * key_want_short_hash - tests whether we can emit a 64bit hash or not. + * @file: the file handle of the directory + */ +static inline bool key_want_short_hash(struct file *file) +{ + if (file->f_mode & FMODE_32BITHASH) + return true; + + if (!(file->f_mode & FMODE_64BITHASH) && is_32bit_api()) + return true; + + return false; +} + +/** + * key_dir_pos - compute a 64bit directory cookie for readdir() + * @c: UBIFS file-system description object + * @file: the file handle of the directory + * @dent: the directory entry + */ +static inline loff_t key_get_dir_pos(const struct ubifs_info *c, + struct file *file, + struct ubifs_dent_node *dent) +{ + BUILD_BUG_ON(sizeof(loff_t) < 8); + + if (key_want_short_hash(file)) + return key_hash_flash(c, &dent->key); + + return ((loff_t)key_hash_flash(c, &dent->key)) << UBIFS_DH_BITS | le32_to_cpu(dent->cookie); +} + +/** + * key_get_hash_from_dir_pos - extracts the flash key from a directory offset. + * @c: UBIFS file-system description object + * @file: the file handle of the directory + * @pos: the directory offset provied by VFS + */ +static inline uint32_t key_get_hash_from_dir_pos(const struct ubifs_info *c, + struct file *file, loff_t pos) +{ + if (key_want_short_hash(file)) + return pos & UBIFS_S_KEY_HASH_MASK; + + return (pos >> UBIFS_DH_BITS) & UBIFS_S_KEY_HASH_MASK; +} + +/** + * key_get_cookie_from_dir_pos - extracts the double hash cookie from a directory offset. + * @c: UBIFS file-system description object + * @pos: the directory offset provied by VFS + */ +static inline uint32_t key_get_cookie_from_dir_pos(const struct ubifs_info *c, + loff_t pos) +{ + return pos & UBIFS_DH_MASK; +} + +/** * key_block - get data block number. * @c: UBIFS file-system description object * @key: the key to get the block number from diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 12f3df3ced0e..0532a6f82b1d 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -40,6 +40,7 @@ #include <linux/xattr.h> #include <linux/fscrypto.h> #include <linux/random.h> +#include <linux/compat.h> #include "ubifs-media.h" /* Version of this UBIFS implementation */
This is the first step to support proper telldir/seekdir() in UBIFS. Let's report 64bit cookies in readdir(). The cookie is a combination of the entry key plus the double hash value. Signed-off-by: Richard Weinberger <richard@nod.at> --- fs/ubifs/dir.c | 46 +++++++++++++++++++++++++++++++------------ fs/ubifs/key.h | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/ubifs/ubifs.h | 1 + 3 files changed, 94 insertions(+), 12 deletions(-)