Message ID | 20200414040030.1802884-5-ira.weiny@intel.com |
---|---|
State | New |
Headers | show |
Series | Enable ext4 support for per-file/directory DAX operations | expand |
On Mon 13-04-20 21:00:26, ira.weiny@intel.com wrote: > From: Ira Weiny <ira.weiny@intel.com> > > Add a flag to preserve FS_XFLAG_DAX in the ext4 inode. > > Set the flag to be user visible and changeable. Set the flag to be > inherited. Allow applications to change the flag at any time. > > Finally, on regular files, flag the inode to not be cached to facilitate > changing S_DAX on the next creation of the inode. > > Signed-off-by: Ira Weiny <ira.weiny@intel.com> > --- > fs/ext4/ext4.h | 13 +++++++++---- > fs/ext4/ioctl.c | 21 ++++++++++++++++++++- > 2 files changed, 29 insertions(+), 5 deletions(-) > > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > index 61b37a052052..434021fcec88 100644 > --- a/fs/ext4/ext4.h > +++ b/fs/ext4/ext4.h > @@ -415,13 +415,16 @@ struct flex_groups { > #define EXT4_VERITY_FL 0x00100000 /* Verity protected inode */ > #define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */ > #define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */ > + > +#define EXT4_DAX_FL 0x00800000 /* Inode is DAX */ > + You seem to be using somewhat older kernel... EXT4_EOFBLOCKS_FL doesn't exist anymore (but still it's good to leave it reserved for some time so the value you've chosen is OK). > @@ -813,6 +818,17 @@ static int ext4_ioctl_get_es_cache(struct file *filp, unsigned long arg) > return error; > } > > +static void ext4_dax_dontcache(struct inode *inode, unsigned int flags) > +{ > + struct ext4_inode_info *ei = EXT4_I(inode); > + > + if (S_ISDIR(inode->i_mode)) > + return; > + > + if ((ei->i_flags ^ flags) == EXT4_DAX_FL) > + inode->i_state |= I_DONTCACHE; > +} > + You probably want to use the function you've introduced in the XFS series here... Honza
On Wed, Apr 15, 2020 at 02:08:46PM +0200, Jan Kara wrote: > On Mon 13-04-20 21:00:26, ira.weiny@intel.com wrote: > > From: Ira Weiny <ira.weiny@intel.com> > > > > Add a flag to preserve FS_XFLAG_DAX in the ext4 inode. > > > > Set the flag to be user visible and changeable. Set the flag to be > > inherited. Allow applications to change the flag at any time. > > > > Finally, on regular files, flag the inode to not be cached to facilitate > > changing S_DAX on the next creation of the inode. > > > > Signed-off-by: Ira Weiny <ira.weiny@intel.com> > > --- > > fs/ext4/ext4.h | 13 +++++++++---- > > fs/ext4/ioctl.c | 21 ++++++++++++++++++++- > > 2 files changed, 29 insertions(+), 5 deletions(-) > > > > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > > index 61b37a052052..434021fcec88 100644 > > --- a/fs/ext4/ext4.h > > +++ b/fs/ext4/ext4.h > > @@ -415,13 +415,16 @@ struct flex_groups { > > #define EXT4_VERITY_FL 0x00100000 /* Verity protected inode */ > > #define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */ > > #define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */ > > + > > +#define EXT4_DAX_FL 0x00800000 /* Inode is DAX */ > > + > > You seem to be using somewhat older kernel... EXT4_EOFBLOCKS_FL doesn't > exist anymore (but still it's good to leave it reserved for some time so > the value you've chosen is OK). I'm on top of 5.6 released. Did this get removed for 5.7? I've heard there are some boot issues with 5.7-rc1 so I'm holding out for rc2. > > > @@ -813,6 +818,17 @@ static int ext4_ioctl_get_es_cache(struct file *filp, unsigned long arg) > > return error; > > } > > > > +static void ext4_dax_dontcache(struct inode *inode, unsigned int flags) > > +{ > > + struct ext4_inode_info *ei = EXT4_I(inode); > > + > > + if (S_ISDIR(inode->i_mode)) > > + return; > > + > > + if ((ei->i_flags ^ flags) == EXT4_DAX_FL) > > + inode->i_state |= I_DONTCACHE; > > +} > > + > > You probably want to use the function you've introduced in the XFS series > here... you mean: flag_inode_dontcache() ??? Yes that is done. I sent this prior to v8 (where that was added) of the other series... Ira > > Honza > -- > Jan Kara <jack@suse.com> > SUSE Labs, CR
On Wed 15-04-20 13:39:25, Ira Weiny wrote: > > > @@ -813,6 +818,17 @@ static int ext4_ioctl_get_es_cache(struct file *filp, unsigned long arg) > > > return error; > > > } > > > > > > +static void ext4_dax_dontcache(struct inode *inode, unsigned int flags) > > > +{ > > > + struct ext4_inode_info *ei = EXT4_I(inode); > > > + > > > + if (S_ISDIR(inode->i_mode)) > > > + return; > > > + > > > + if ((ei->i_flags ^ flags) == EXT4_DAX_FL) > > > + inode->i_state |= I_DONTCACHE; > > > +} > > > + > > > > You probably want to use the function you've introduced in the XFS series > > here... > > you mean: > > flag_inode_dontcache() > ??? Yeah, that's what I meant. > Yes that is done. I sent this prior to v8 (where that was added) of the other > series... Yep, I thought that was the case but I wanted to mention it as a reminder. Honza
On Mon, Apr 13, 2020 at 09:00:26PM -0700, ira.weiny@intel.com wrote: > From: Ira Weiny <ira.weiny@intel.com> > > Add a flag to preserve FS_XFLAG_DAX in the ext4 inode. > > Set the flag to be user visible and changeable. Set the flag to be > inherited. Allow applications to change the flag at any time. > > Finally, on regular files, flag the inode to not be cached to facilitate > changing S_DAX on the next creation of the inode. > > Signed-off-by: Ira Weiny <ira.weiny@intel.com> > --- > fs/ext4/ext4.h | 13 +++++++++---- > fs/ext4/ioctl.c | 21 ++++++++++++++++++++- > 2 files changed, 29 insertions(+), 5 deletions(-) > > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > index 61b37a052052..434021fcec88 100644 > --- a/fs/ext4/ext4.h > +++ b/fs/ext4/ext4.h > @@ -415,13 +415,16 @@ struct flex_groups { > #define EXT4_VERITY_FL 0x00100000 /* Verity protected inode */ > #define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */ > #define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */ > + > +#define EXT4_DAX_FL 0x00800000 /* Inode is DAX */ Sooo, fun fact about ext4 vs. the world-- The GETFLAGS/SETFLAGS ioctl, since it came from ext2, shares the same flag values as the ondisk inode flags in ext*. Therefore, each of these EXT4_[whatever]_FL values are supposed to have a FS_[whatever]_FL equivalent in include/uapi/linux/fs.h. (Note that the "[whatever]" is a straight translation since the same uapi header also defines the FS_XFLAG_[xfswhatever] flag values; ignore those.) Evidently, FS_NOCOW_FL already took 0x800000, but ext4.h was never updated to note that the value was taken. I think Ted might be inclined to reserve the ondisk inode bit just in case ext4 ever does support copy on write, though that's his call. :) Long story short - can you use 0x1000000 for this instead, and add the corresponding value to the uapi fs.h? I guess that also means that we can change FS_XFLAG_DAX (in the form of FS_DAX_FL in FSSETFLAGS) after that. --D > + > #define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */ > #define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ > #define EXT4_CASEFOLD_FL 0x40000000 /* Casefolded file */ > #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ > > -#define EXT4_FL_USER_VISIBLE 0x705BDFFF /* User visible flags */ > -#define EXT4_FL_USER_MODIFIABLE 0x604BC0FF /* User modifiable flags */ > +#define EXT4_FL_USER_VISIBLE 0x70DBDFFF /* User visible flags */ > +#define EXT4_FL_USER_MODIFIABLE 0x60CBC0FF /* User modifiable flags */ > > /* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */ > #define EXT4_FL_XFLAG_VISIBLE (EXT4_SYNC_FL | \ > @@ -429,14 +432,16 @@ struct flex_groups { > EXT4_APPEND_FL | \ > EXT4_NODUMP_FL | \ > EXT4_NOATIME_FL | \ > - EXT4_PROJINHERIT_FL) > + EXT4_PROJINHERIT_FL | \ > + EXT4_DAX_FL) > > /* Flags that should be inherited by new inodes from their parent. */ > #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\ > EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\ > EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\ > EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\ > - EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL) > + EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL |\ > + EXT4_DAX_FL) > > /* Flags that are appropriate for regular files (all but dir-specific ones). */ > #define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL |\ > diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c > index ee3401a32e79..ca07d5086f03 100644 > --- a/fs/ext4/ioctl.c > +++ b/fs/ext4/ioctl.c > @@ -539,12 +539,15 @@ static inline __u32 ext4_iflags_to_xflags(unsigned long iflags) > xflags |= FS_XFLAG_NOATIME; > if (iflags & EXT4_PROJINHERIT_FL) > xflags |= FS_XFLAG_PROJINHERIT; > + if (iflags & EXT4_DAX_FL) > + xflags |= FS_XFLAG_DAX; > return xflags; > } > > #define EXT4_SUPPORTED_FS_XFLAGS (FS_XFLAG_SYNC | FS_XFLAG_IMMUTABLE | \ > FS_XFLAG_APPEND | FS_XFLAG_NODUMP | \ > - FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT) > + FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT | \ > + FS_XFLAG_DAX) > > /* Transfer xflags flags to internal */ > static inline unsigned long ext4_xflags_to_iflags(__u32 xflags) > @@ -563,6 +566,8 @@ static inline unsigned long ext4_xflags_to_iflags(__u32 xflags) > iflags |= EXT4_NOATIME_FL; > if (xflags & FS_XFLAG_PROJINHERIT) > iflags |= EXT4_PROJINHERIT_FL; > + if (xflags & FS_XFLAG_DAX) > + iflags |= EXT4_DAX_FL; > > return iflags; > } > @@ -813,6 +818,17 @@ static int ext4_ioctl_get_es_cache(struct file *filp, unsigned long arg) > return error; > } > > +static void ext4_dax_dontcache(struct inode *inode, unsigned int flags) > +{ > + struct ext4_inode_info *ei = EXT4_I(inode); > + > + if (S_ISDIR(inode->i_mode)) > + return; > + > + if ((ei->i_flags ^ flags) == EXT4_DAX_FL) > + inode->i_state |= I_DONTCACHE; > +} > + > long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) > { > struct inode *inode = file_inode(filp); > @@ -1273,6 +1289,9 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) > return err; > > inode_lock(inode); > + > + ext4_dax_dontcache(inode, flags); > + > ext4_fill_fsxattr(inode, &old_fa); > err = vfs_ioc_fssetxattr_check(inode, &old_fa, &fa); > if (err) > -- > 2.25.1 >
On Wed, Apr 15, 2020 at 01:39:25PM -0700, Ira Weiny wrote: > > I'm on top of 5.6 released. Did this get removed for 5.7? I've heard there are > some boot issues with 5.7-rc1 so I'm holding out for rc2. Yes, it got removed in 5.7-rc1 in commit 4337ecd1fe99. The boot issues with 5.7-rc1 is why ext4.git tree is now based off of v5.7-rc1-35-g00086336a8d9: Merge tag 'efi-urgent-2020-04-15'.... You might want to see if 00086336a8d9 works for you (and if not, let the x86 and/or efi folks know). - Ted
On Thu, Apr 16, 2020 at 09:25:04AM -0700, Darrick J. Wong wrote: > On Mon, Apr 13, 2020 at 09:00:26PM -0700, ira.weiny@intel.com wrote: > > From: Ira Weiny <ira.weiny@intel.com> > > > > Add a flag to preserve FS_XFLAG_DAX in the ext4 inode. > > > > Set the flag to be user visible and changeable. Set the flag to be > > inherited. Allow applications to change the flag at any time. > > > > Finally, on regular files, flag the inode to not be cached to facilitate > > changing S_DAX on the next creation of the inode. > > > > Signed-off-by: Ira Weiny <ira.weiny@intel.com> > > --- > > fs/ext4/ext4.h | 13 +++++++++---- > > fs/ext4/ioctl.c | 21 ++++++++++++++++++++- > > 2 files changed, 29 insertions(+), 5 deletions(-) > > > > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > > index 61b37a052052..434021fcec88 100644 > > --- a/fs/ext4/ext4.h > > +++ b/fs/ext4/ext4.h > > @@ -415,13 +415,16 @@ struct flex_groups { > > #define EXT4_VERITY_FL 0x00100000 /* Verity protected inode */ > > #define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */ > > #define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */ > > + > > +#define EXT4_DAX_FL 0x00800000 /* Inode is DAX */ > > Sooo, fun fact about ext4 vs. the world-- > > The GETFLAGS/SETFLAGS ioctl, since it came from ext2, shares the same > flag values as the ondisk inode flags in ext*. Therefore, each of these > EXT4_[whatever]_FL values are supposed to have a FS_[whatever]_FL > equivalent in include/uapi/linux/fs.h. Interesting... > > (Note that the "[whatever]" is a straight translation since the same > uapi header also defines the FS_XFLAG_[xfswhatever] flag values; ignore > those.) > > Evidently, FS_NOCOW_FL already took 0x800000, but ext4.h was never > updated to note that the value was taken. I think Ted might be inclined > to reserve the ondisk inode bit just in case ext4 ever does support copy > on write, though that's his call. :) Seems like I should change this... And I did not realize I was inherently changing a bit definition which was exposed to other FS's... > > Long story short - can you use 0x1000000 for this instead, and add the > corresponding value to the uapi fs.h? I guess that also means that we > can change FS_XFLAG_DAX (in the form of FS_DAX_FL in FSSETFLAGS) after > that. :-/ Are there any potential users of FS_XFLAG_DAX now? From what it looks like, changing FS_XFLAG_DAX to FS_DAX_FL would be pretty straight forward. Just to be sure, looks like XFS converts the FS_[xxx]_FL to FS_XFLAGS_[xxx] in xfs_merge_ioc_xflags()? But it does not look like all the FS_[xxx]_FL flags are converted. Is is that XFS does not support those options? Or is it depending on the VFS layer for some of them? Ira > > --D > > > + > > #define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */ > > #define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ > > #define EXT4_CASEFOLD_FL 0x40000000 /* Casefolded file */ > > #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ > > > > -#define EXT4_FL_USER_VISIBLE 0x705BDFFF /* User visible flags */ > > -#define EXT4_FL_USER_MODIFIABLE 0x604BC0FF /* User modifiable flags */ > > +#define EXT4_FL_USER_VISIBLE 0x70DBDFFF /* User visible flags */ > > +#define EXT4_FL_USER_MODIFIABLE 0x60CBC0FF /* User modifiable flags */ > > > > /* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */ > > #define EXT4_FL_XFLAG_VISIBLE (EXT4_SYNC_FL | \ > > @@ -429,14 +432,16 @@ struct flex_groups { > > EXT4_APPEND_FL | \ > > EXT4_NODUMP_FL | \ > > EXT4_NOATIME_FL | \ > > - EXT4_PROJINHERIT_FL) > > + EXT4_PROJINHERIT_FL | \ > > + EXT4_DAX_FL) > > > > /* Flags that should be inherited by new inodes from their parent. */ > > #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\ > > EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\ > > EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\ > > EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\ > > - EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL) > > + EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL |\ > > + EXT4_DAX_FL) > > > > /* Flags that are appropriate for regular files (all but dir-specific ones). */ > > #define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL |\ > > diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c > > index ee3401a32e79..ca07d5086f03 100644 > > --- a/fs/ext4/ioctl.c > > +++ b/fs/ext4/ioctl.c > > @@ -539,12 +539,15 @@ static inline __u32 ext4_iflags_to_xflags(unsigned long iflags) > > xflags |= FS_XFLAG_NOATIME; > > if (iflags & EXT4_PROJINHERIT_FL) > > xflags |= FS_XFLAG_PROJINHERIT; > > + if (iflags & EXT4_DAX_FL) > > + xflags |= FS_XFLAG_DAX; > > return xflags; > > } > > > > #define EXT4_SUPPORTED_FS_XFLAGS (FS_XFLAG_SYNC | FS_XFLAG_IMMUTABLE | \ > > FS_XFLAG_APPEND | FS_XFLAG_NODUMP | \ > > - FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT) > > + FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT | \ > > + FS_XFLAG_DAX) > > > > /* Transfer xflags flags to internal */ > > static inline unsigned long ext4_xflags_to_iflags(__u32 xflags) > > @@ -563,6 +566,8 @@ static inline unsigned long ext4_xflags_to_iflags(__u32 xflags) > > iflags |= EXT4_NOATIME_FL; > > if (xflags & FS_XFLAG_PROJINHERIT) > > iflags |= EXT4_PROJINHERIT_FL; > > + if (xflags & FS_XFLAG_DAX) > > + iflags |= EXT4_DAX_FL; > > > > return iflags; > > } > > @@ -813,6 +818,17 @@ static int ext4_ioctl_get_es_cache(struct file *filp, unsigned long arg) > > return error; > > } > > > > +static void ext4_dax_dontcache(struct inode *inode, unsigned int flags) > > +{ > > + struct ext4_inode_info *ei = EXT4_I(inode); > > + > > + if (S_ISDIR(inode->i_mode)) > > + return; > > + > > + if ((ei->i_flags ^ flags) == EXT4_DAX_FL) > > + inode->i_state |= I_DONTCACHE; > > +} > > + > > long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) > > { > > struct inode *inode = file_inode(filp); > > @@ -1273,6 +1289,9 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) > > return err; > > > > inode_lock(inode); > > + > > + ext4_dax_dontcache(inode, flags); > > + > > ext4_fill_fsxattr(inode, &old_fa); > > err = vfs_ioc_fssetxattr_check(inode, &old_fa, &fa); > > if (err) > > -- > > 2.25.1 > >
On Thu, Apr 16, 2020 at 03:33:27PM -0700, Ira Weiny wrote: > On Thu, Apr 16, 2020 at 09:25:04AM -0700, Darrick J. Wong wrote: > > On Mon, Apr 13, 2020 at 09:00:26PM -0700, ira.weiny@intel.com wrote: > > > From: Ira Weiny <ira.weiny@intel.com> > > > > > > Add a flag to preserve FS_XFLAG_DAX in the ext4 inode. > > > > > > Set the flag to be user visible and changeable. Set the flag to be > > > inherited. Allow applications to change the flag at any time. > > > > > > Finally, on regular files, flag the inode to not be cached to facilitate > > > changing S_DAX on the next creation of the inode. > > > > > > Signed-off-by: Ira Weiny <ira.weiny@intel.com> > > > --- > > > fs/ext4/ext4.h | 13 +++++++++---- > > > fs/ext4/ioctl.c | 21 ++++++++++++++++++++- > > > 2 files changed, 29 insertions(+), 5 deletions(-) > > > > > > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > > > index 61b37a052052..434021fcec88 100644 > > > --- a/fs/ext4/ext4.h > > > +++ b/fs/ext4/ext4.h > > > @@ -415,13 +415,16 @@ struct flex_groups { > > > #define EXT4_VERITY_FL 0x00100000 /* Verity protected inode */ > > > #define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */ > > > #define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */ > > > + > > > +#define EXT4_DAX_FL 0x00800000 /* Inode is DAX */ > > > > Sooo, fun fact about ext4 vs. the world-- > > > > The GETFLAGS/SETFLAGS ioctl, since it came from ext2, shares the same > > flag values as the ondisk inode flags in ext*. Therefore, each of these > > EXT4_[whatever]_FL values are supposed to have a FS_[whatever]_FL > > equivalent in include/uapi/linux/fs.h. > > Interesting... > > > > > (Note that the "[whatever]" is a straight translation since the same > > uapi header also defines the FS_XFLAG_[xfswhatever] flag values; ignore > > those.) > > > > Evidently, FS_NOCOW_FL already took 0x800000, but ext4.h was never > > updated to note that the value was taken. I think Ted might be inclined > > to reserve the ondisk inode bit just in case ext4 ever does support copy > > on write, though that's his call. :) > > Seems like I should change this... And I did not realize I was inherently > changing a bit definition which was exposed to other FS's... <nod> This whole thing is a mess, particularly now that we have two vfs ioctls to set per-fs inode attributes, both of which were inherited from other filesystems... :( > > > > Long story short - can you use 0x1000000 for this instead, and add the > > corresponding value to the uapi fs.h? I guess that also means that we > > can change FS_XFLAG_DAX (in the form of FS_DAX_FL in FSSETFLAGS) after > > that. > > :-/ > > Are there any potential users of FS_XFLAG_DAX now? Yes, it's in the userspace ABI so we can't get rid of it. (FWIW there are several flags that exist in both FS_XFLAG_* and FS_*_FL form.) > From what it looks like, changing FS_XFLAG_DAX to FS_DAX_FL would be pretty > straight forward. Just to be sure, looks like XFS converts the FS_[xxx]_FL to > FS_XFLAGS_[xxx] in xfs_merge_ioc_xflags()? But it does not look like all the > FS_[xxx]_FL flags are converted. Is is that XFS does not support those > options? Or is it depending on the VFS layer for some of them? XFS doesn't support most of the FS_*_FL flags. --D > Ira > > > > > --D > > > > > + > > > #define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */ > > > #define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ > > > #define EXT4_CASEFOLD_FL 0x40000000 /* Casefolded file */ > > > #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ > > > > > > -#define EXT4_FL_USER_VISIBLE 0x705BDFFF /* User visible flags */ > > > -#define EXT4_FL_USER_MODIFIABLE 0x604BC0FF /* User modifiable flags */ > > > +#define EXT4_FL_USER_VISIBLE 0x70DBDFFF /* User visible flags */ > > > +#define EXT4_FL_USER_MODIFIABLE 0x60CBC0FF /* User modifiable flags */ > > > > > > /* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */ > > > #define EXT4_FL_XFLAG_VISIBLE (EXT4_SYNC_FL | \ > > > @@ -429,14 +432,16 @@ struct flex_groups { > > > EXT4_APPEND_FL | \ > > > EXT4_NODUMP_FL | \ > > > EXT4_NOATIME_FL | \ > > > - EXT4_PROJINHERIT_FL) > > > + EXT4_PROJINHERIT_FL | \ > > > + EXT4_DAX_FL) > > > > > > /* Flags that should be inherited by new inodes from their parent. */ > > > #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\ > > > EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\ > > > EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\ > > > EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\ > > > - EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL) > > > + EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL |\ > > > + EXT4_DAX_FL) > > > > > > /* Flags that are appropriate for regular files (all but dir-specific ones). */ > > > #define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL |\ > > > diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c > > > index ee3401a32e79..ca07d5086f03 100644 > > > --- a/fs/ext4/ioctl.c > > > +++ b/fs/ext4/ioctl.c > > > @@ -539,12 +539,15 @@ static inline __u32 ext4_iflags_to_xflags(unsigned long iflags) > > > xflags |= FS_XFLAG_NOATIME; > > > if (iflags & EXT4_PROJINHERIT_FL) > > > xflags |= FS_XFLAG_PROJINHERIT; > > > + if (iflags & EXT4_DAX_FL) > > > + xflags |= FS_XFLAG_DAX; > > > return xflags; > > > } > > > > > > #define EXT4_SUPPORTED_FS_XFLAGS (FS_XFLAG_SYNC | FS_XFLAG_IMMUTABLE | \ > > > FS_XFLAG_APPEND | FS_XFLAG_NODUMP | \ > > > - FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT) > > > + FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT | \ > > > + FS_XFLAG_DAX) > > > > > > /* Transfer xflags flags to internal */ > > > static inline unsigned long ext4_xflags_to_iflags(__u32 xflags) > > > @@ -563,6 +566,8 @@ static inline unsigned long ext4_xflags_to_iflags(__u32 xflags) > > > iflags |= EXT4_NOATIME_FL; > > > if (xflags & FS_XFLAG_PROJINHERIT) > > > iflags |= EXT4_PROJINHERIT_FL; > > > + if (xflags & FS_XFLAG_DAX) > > > + iflags |= EXT4_DAX_FL; > > > > > > return iflags; > > > } > > > @@ -813,6 +818,17 @@ static int ext4_ioctl_get_es_cache(struct file *filp, unsigned long arg) > > > return error; > > > } > > > > > > +static void ext4_dax_dontcache(struct inode *inode, unsigned int flags) > > > +{ > > > + struct ext4_inode_info *ei = EXT4_I(inode); > > > + > > > + if (S_ISDIR(inode->i_mode)) > > > + return; > > > + > > > + if ((ei->i_flags ^ flags) == EXT4_DAX_FL) > > > + inode->i_state |= I_DONTCACHE; > > > +} > > > + > > > long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) > > > { > > > struct inode *inode = file_inode(filp); > > > @@ -1273,6 +1289,9 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) > > > return err; > > > > > > inode_lock(inode); > > > + > > > + ext4_dax_dontcache(inode, flags); > > > + > > > ext4_fill_fsxattr(inode, &old_fa); > > > err = vfs_ioc_fssetxattr_check(inode, &old_fa, &fa); > > > if (err) > > > -- > > > 2.25.1 > > >
On Thu, Apr 16, 2020 at 03:49:37PM -0700, Darrick J. Wong wrote: > On Thu, Apr 16, 2020 at 03:33:27PM -0700, Ira Weiny wrote: > > On Thu, Apr 16, 2020 at 09:25:04AM -0700, Darrick J. Wong wrote: > > > On Mon, Apr 13, 2020 at 09:00:26PM -0700, ira.weiny@intel.com wrote: > > > > From: Ira Weiny <ira.weiny@intel.com> > > > > > > > > Add a flag to preserve FS_XFLAG_DAX in the ext4 inode. > > > > > > > > Set the flag to be user visible and changeable. Set the flag to be > > > > inherited. Allow applications to change the flag at any time. > > > > > > > > Finally, on regular files, flag the inode to not be cached to facilitate > > > > changing S_DAX on the next creation of the inode. > > > > > > > > Signed-off-by: Ira Weiny <ira.weiny@intel.com> > > > > --- > > > > fs/ext4/ext4.h | 13 +++++++++---- > > > > fs/ext4/ioctl.c | 21 ++++++++++++++++++++- > > > > 2 files changed, 29 insertions(+), 5 deletions(-) > > > > > > > > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > > > > index 61b37a052052..434021fcec88 100644 > > > > --- a/fs/ext4/ext4.h > > > > +++ b/fs/ext4/ext4.h > > > > @@ -415,13 +415,16 @@ struct flex_groups { > > > > #define EXT4_VERITY_FL 0x00100000 /* Verity protected inode */ > > > > #define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */ > > > > #define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */ > > > > + > > > > +#define EXT4_DAX_FL 0x00800000 /* Inode is DAX */ > > > > > > Sooo, fun fact about ext4 vs. the world-- > > > > > > The GETFLAGS/SETFLAGS ioctl, since it came from ext2, shares the same > > > flag values as the ondisk inode flags in ext*. Therefore, each of these > > > EXT4_[whatever]_FL values are supposed to have a FS_[whatever]_FL > > > equivalent in include/uapi/linux/fs.h. > > > > Interesting... > > > > > > > > (Note that the "[whatever]" is a straight translation since the same > > > uapi header also defines the FS_XFLAG_[xfswhatever] flag values; ignore > > > those.) > > > > > > Evidently, FS_NOCOW_FL already took 0x800000, but ext4.h was never > > > updated to note that the value was taken. I think Ted might be inclined > > > to reserve the ondisk inode bit just in case ext4 ever does support copy > > > on write, though that's his call. :) > > > > Seems like I should change this... And I did not realize I was inherently > > changing a bit definition which was exposed to other FS's... > > <nod> This whole thing is a mess, particularly now that we have two vfs > ioctls to set per-fs inode attributes, both of which were inherited from > other filesystems... :( > Ok I've changed it. > > > > > > > Long story short - can you use 0x1000000 for this instead, and add the > > > corresponding value to the uapi fs.h? I guess that also means that we > > > can change FS_XFLAG_DAX (in the form of FS_DAX_FL in FSSETFLAGS) after > > > that. > > > > :-/ > > > > Are there any potential users of FS_XFLAG_DAX now? > > Yes, it's in the userspace ABI so we can't get rid of it. > > (FWIW there are several flags that exist in both FS_XFLAG_* and FS_*_FL > form.) > > > From what it looks like, changing FS_XFLAG_DAX to FS_DAX_FL would be pretty > > straight forward. Just to be sure, looks like XFS converts the FS_[xxx]_FL to > > FS_XFLAGS_[xxx] in xfs_merge_ioc_xflags()? But it does not look like all the > > FS_[xxx]_FL flags are converted. Is is that XFS does not support those > > options? Or is it depending on the VFS layer for some of them? > > XFS doesn't support most of the FS_*_FL flags. If FS_XFLAG_DAX needs to continue to be user visible I think we need to keep that flag and we should not expose the EXT4_DAX_FL flag... I think that works for XFS. But for ext4 it looks like EXT4_FL_XFLAG_VISIBLE was intended to be used for [GET|SET]XATTR where EXT4_FL_USER_VISIBLE was intended to for [GET|SET]FLAGS... But if I don't add EXT4_DAX_FL in EXT4_FL_XFLAG_VISIBLE my test fails. I've been playing with the flags and looking at the code and I _thought_ the following patch would ensure that FS_XFLAG_DAX is the only one visible but for some reason FS_XFLAG_DAX can't be set with this patch. I still need the EXT4_FL_USER_VISIBLE mask altered... Which I believe would expose EXT4_DAX_FL directly as well. Jan, Ted? Any ideas? Or should we expose EXT4_DAX_FL and FS_XFLAG_DAX in ext4? Ira diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index fb7e66089a74..c3823f057755 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -423,7 +423,7 @@ struct flex_groups { #define EXT4_CASEFOLD_FL 0x40000000 /* Casefolded file */ #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ -#define EXT4_FL_USER_VISIBLE 0x715BDFFF /* User visible flags */ +#define EXT4_FL_USER_VISIBLE 0x705BDFFF /* User visible flags */ #define EXT4_FL_USER_MODIFIABLE 0x614BC0FF /* User modifiable flags */ /* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */ diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index b3c6e891185e..8bd0d3f9ca0b 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -744,8 +744,8 @@ static void ext4_fill_fsxattr(struct inode *inode, struct fsxattr *fa) { struct ext4_inode_info *ei = EXT4_I(inode); - simple_fill_fsxattr(fa, ext4_iflags_to_xflags(ei->i_flags & - EXT4_FL_USER_VISIBLE)); + simple_fill_fsxattr(fa, (ext4_iflags_to_xflags(ei->i_flags) & + EXT4_FL_XFLAG_VISIBLE)); if (ext4_has_feature_project(inode->i_sb)) fa->fsx_projid = from_kprojid(&init_user_ns, ei->i_projid);
On Thu, Apr 16, 2020 at 05:37:19PM -0700, Ira Weiny wrote: > On Thu, Apr 16, 2020 at 03:49:37PM -0700, Darrick J. Wong wrote: > > On Thu, Apr 16, 2020 at 03:33:27PM -0700, Ira Weiny wrote: > > > On Thu, Apr 16, 2020 at 09:25:04AM -0700, Darrick J. Wong wrote: > > > > On Mon, Apr 13, 2020 at 09:00:26PM -0700, ira.weiny@intel.com wrote: > > > > > From: Ira Weiny <ira.weiny@intel.com> > > > > > > > > > > Add a flag to preserve FS_XFLAG_DAX in the ext4 inode. > > > > > > > > > > Set the flag to be user visible and changeable. Set the flag to be > > > > > inherited. Allow applications to change the flag at any time. > > > > > > > > > > Finally, on regular files, flag the inode to not be cached to facilitate > > > > > changing S_DAX on the next creation of the inode. > > > > > > > > > > Signed-off-by: Ira Weiny <ira.weiny@intel.com> > > > > > --- > > > > > fs/ext4/ext4.h | 13 +++++++++---- > > > > > fs/ext4/ioctl.c | 21 ++++++++++++++++++++- > > > > > 2 files changed, 29 insertions(+), 5 deletions(-) > > > > > > > > > > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > > > > > index 61b37a052052..434021fcec88 100644 > > > > > --- a/fs/ext4/ext4.h > > > > > +++ b/fs/ext4/ext4.h > > > > > @@ -415,13 +415,16 @@ struct flex_groups { > > > > > #define EXT4_VERITY_FL 0x00100000 /* Verity protected inode */ > > > > > #define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */ > > > > > #define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */ > > > > > + > > > > > +#define EXT4_DAX_FL 0x00800000 /* Inode is DAX */ > > > > > > > > Sooo, fun fact about ext4 vs. the world-- > > > > > > > > The GETFLAGS/SETFLAGS ioctl, since it came from ext2, shares the same > > > > flag values as the ondisk inode flags in ext*. Therefore, each of these > > > > EXT4_[whatever]_FL values are supposed to have a FS_[whatever]_FL > > > > equivalent in include/uapi/linux/fs.h. > > > > > > Interesting... > > > > > > > > > > > (Note that the "[whatever]" is a straight translation since the same > > > > uapi header also defines the FS_XFLAG_[xfswhatever] flag values; ignore > > > > those.) > > > > > > > > Evidently, FS_NOCOW_FL already took 0x800000, but ext4.h was never > > > > updated to note that the value was taken. I think Ted might be inclined > > > > to reserve the ondisk inode bit just in case ext4 ever does support copy > > > > on write, though that's his call. :) > > > > > > Seems like I should change this... And I did not realize I was inherently > > > changing a bit definition which was exposed to other FS's... > > > > <nod> This whole thing is a mess, particularly now that we have two vfs > > ioctls to set per-fs inode attributes, both of which were inherited from > > other filesystems... :( > > > > Ok I've changed it. > > > > > > > > > > > Long story short - can you use 0x1000000 for this instead, and add the > > > > corresponding value to the uapi fs.h? I guess that also means that we > > > > can change FS_XFLAG_DAX (in the form of FS_DAX_FL in FSSETFLAGS) after > > > > that. > > > > > > :-/ > > > > > > Are there any potential users of FS_XFLAG_DAX now? > > > > Yes, it's in the userspace ABI so we can't get rid of it. > > > > (FWIW there are several flags that exist in both FS_XFLAG_* and FS_*_FL > > form.) > > > > > From what it looks like, changing FS_XFLAG_DAX to FS_DAX_FL would be pretty > > > straight forward. Just to be sure, looks like XFS converts the FS_[xxx]_FL to > > > FS_XFLAGS_[xxx] in xfs_merge_ioc_xflags()? But it does not look like all the > > > FS_[xxx]_FL flags are converted. Is is that XFS does not support those > > > options? Or is it depending on the VFS layer for some of them? > > > > XFS doesn't support most of the FS_*_FL flags. > > If FS_XFLAG_DAX needs to continue to be user visible I think we need to keep > that flag and we should not expose the EXT4_DAX_FL flag... > > I think that works for XFS. > > But for ext4 it looks like EXT4_FL_XFLAG_VISIBLE was intended to be used for > [GET|SET]XATTR where EXT4_FL_USER_VISIBLE was intended to for [GET|SET]FLAGS... > But if I don't add EXT4_DAX_FL in EXT4_FL_XFLAG_VISIBLE my test fails. > > I've been playing with the flags and looking at the code and I _thought_ the > following patch would ensure that FS_XFLAG_DAX is the only one visible but for > some reason FS_XFLAG_DAX can't be set with this patch. I still need the > EXT4_FL_USER_VISIBLE mask altered... Which I believe would expose EXT4_DAX_FL > directly as well. > > Jan, Ted? Any ideas? Or should we expose EXT4_DAX_FL and FS_XFLAG_DAX in > ext4? Both flags should be exposed through their respective ioctl interfaces in both filesystems. That way we don't have to add even more verbiage to the documentation to instruct userspace programmers on how to special case ext4 and XFS for the same piece of functionality. --D > Ira > > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > index fb7e66089a74..c3823f057755 100644 > --- a/fs/ext4/ext4.h > +++ b/fs/ext4/ext4.h > @@ -423,7 +423,7 @@ struct flex_groups { > #define EXT4_CASEFOLD_FL 0x40000000 /* Casefolded file */ > #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ > > -#define EXT4_FL_USER_VISIBLE 0x715BDFFF /* User visible flags */ > +#define EXT4_FL_USER_VISIBLE 0x705BDFFF /* User visible flags */ > #define EXT4_FL_USER_MODIFIABLE 0x614BC0FF /* User modifiable flags */ > > /* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */ > diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c > index b3c6e891185e..8bd0d3f9ca0b 100644 > --- a/fs/ext4/ioctl.c > +++ b/fs/ext4/ioctl.c > @@ -744,8 +744,8 @@ static void ext4_fill_fsxattr(struct inode *inode, struct fsxattr *fa) > { > struct ext4_inode_info *ei = EXT4_I(inode); > > - simple_fill_fsxattr(fa, ext4_iflags_to_xflags(ei->i_flags & > - EXT4_FL_USER_VISIBLE)); > + simple_fill_fsxattr(fa, (ext4_iflags_to_xflags(ei->i_flags) & > + EXT4_FL_XFLAG_VISIBLE)); > > if (ext4_has_feature_project(inode->i_sb)) > fa->fsx_projid = from_kprojid(&init_user_ns, ei->i_projid);
On Thu, Apr 16, 2020 at 06:57:31PM -0700, Darrick J. Wong wrote: > On Thu, Apr 16, 2020 at 05:37:19PM -0700, Ira Weiny wrote: > > On Thu, Apr 16, 2020 at 03:49:37PM -0700, Darrick J. Wong wrote: > > > On Thu, Apr 16, 2020 at 03:33:27PM -0700, Ira Weiny wrote: > > > > On Thu, Apr 16, 2020 at 09:25:04AM -0700, Darrick J. Wong wrote: > > > > > On Mon, Apr 13, 2020 at 09:00:26PM -0700, ira.weiny@intel.com wrote: > > > > > > From: Ira Weiny <ira.weiny@intel.com> > > > > > > > > > > > > Add a flag to preserve FS_XFLAG_DAX in the ext4 inode. > > > > > > > > > > > > Set the flag to be user visible and changeable. Set the flag to be > > > > > > inherited. Allow applications to change the flag at any time. > > > > > > > > > > > > Finally, on regular files, flag the inode to not be cached to facilitate > > > > > > changing S_DAX on the next creation of the inode. > > > > > > > > > > > > Signed-off-by: Ira Weiny <ira.weiny@intel.com> > > > > > > --- > > > > > > fs/ext4/ext4.h | 13 +++++++++---- > > > > > > fs/ext4/ioctl.c | 21 ++++++++++++++++++++- > > > > > > 2 files changed, 29 insertions(+), 5 deletions(-) > > > > > > > > > > > > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > > > > > > index 61b37a052052..434021fcec88 100644 > > > > > > --- a/fs/ext4/ext4.h > > > > > > +++ b/fs/ext4/ext4.h > > > > > > @@ -415,13 +415,16 @@ struct flex_groups { > > > > > > #define EXT4_VERITY_FL 0x00100000 /* Verity protected inode */ > > > > > > #define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */ > > > > > > #define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */ > > > > > > + > > > > > > +#define EXT4_DAX_FL 0x00800000 /* Inode is DAX */ > > > > > > > > > > Sooo, fun fact about ext4 vs. the world-- > > > > > > > > > > The GETFLAGS/SETFLAGS ioctl, since it came from ext2, shares the same > > > > > flag values as the ondisk inode flags in ext*. Therefore, each of these > > > > > EXT4_[whatever]_FL values are supposed to have a FS_[whatever]_FL > > > > > equivalent in include/uapi/linux/fs.h. > > > > > > > > Interesting... > > > > > > > > > > > > > > (Note that the "[whatever]" is a straight translation since the same > > > > > uapi header also defines the FS_XFLAG_[xfswhatever] flag values; ignore > > > > > those.) > > > > > > > > > > Evidently, FS_NOCOW_FL already took 0x800000, but ext4.h was never > > > > > updated to note that the value was taken. I think Ted might be inclined > > > > > to reserve the ondisk inode bit just in case ext4 ever does support copy > > > > > on write, though that's his call. :) > > > > > > > > Seems like I should change this... And I did not realize I was inherently > > > > changing a bit definition which was exposed to other FS's... > > > > > > <nod> This whole thing is a mess, particularly now that we have two vfs > > > ioctls to set per-fs inode attributes, both of which were inherited from > > > other filesystems... :( > > > > > > > Ok I've changed it. > > > > > > > > > > > > > > > Long story short - can you use 0x1000000 for this instead, and add the > > > > > corresponding value to the uapi fs.h? I guess that also means that we > > > > > can change FS_XFLAG_DAX (in the form of FS_DAX_FL in FSSETFLAGS) after > > > > > that. > > > > > > > > :-/ > > > > > > > > Are there any potential users of FS_XFLAG_DAX now? > > > > > > Yes, it's in the userspace ABI so we can't get rid of it. > > > > > > (FWIW there are several flags that exist in both FS_XFLAG_* and FS_*_FL > > > form.) > > > > > > > From what it looks like, changing FS_XFLAG_DAX to FS_DAX_FL would be pretty > > > > straight forward. Just to be sure, looks like XFS converts the FS_[xxx]_FL to > > > > FS_XFLAGS_[xxx] in xfs_merge_ioc_xflags()? But it does not look like all the > > > > FS_[xxx]_FL flags are converted. Is is that XFS does not support those > > > > options? Or is it depending on the VFS layer for some of them? > > > > > > XFS doesn't support most of the FS_*_FL flags. > > > > If FS_XFLAG_DAX needs to continue to be user visible I think we need to keep > > that flag and we should not expose the EXT4_DAX_FL flag... > > > > I think that works for XFS. > > > > But for ext4 it looks like EXT4_FL_XFLAG_VISIBLE was intended to be used for > > [GET|SET]XATTR where EXT4_FL_USER_VISIBLE was intended to for [GET|SET]FLAGS... > > But if I don't add EXT4_DAX_FL in EXT4_FL_XFLAG_VISIBLE my test fails. > > > > I've been playing with the flags and looking at the code and I _thought_ the > > following patch would ensure that FS_XFLAG_DAX is the only one visible but for > > some reason FS_XFLAG_DAX can't be set with this patch. I still need the > > EXT4_FL_USER_VISIBLE mask altered... Which I believe would expose EXT4_DAX_FL > > directly as well. > > > > Jan, Ted? Any ideas? Or should we expose EXT4_DAX_FL and FS_XFLAG_DAX in > > ext4? > > Both flags should be exposed through their respective ioctl interfaces > in both filesystems. That way we don't have to add even more verbiage > to the documentation to instruct userspace programmers on how to special > case ext4 and XFS for the same piece of functionality. Wouldn't it be more confusing for the user to have 2 different flags which do the same thing? I would think that using FS_XFLAG_DAX _only_ (for both ext4 and xfs) would be easier without special cases? Ira
We still need to store an on-disk DAX flag for Ext4, and at that point it doesn't make sense not to expose it via the standard Ext4 chattr utility. So having EXT4_DAX_FL (== FS_DAX_FL) is no extra effort to add. Cheers, Andreas > On Apr 16, 2020, at 20:20, Ira Weiny <ira.weiny@intel.com> wrote: > > On Thu, Apr 16, 2020 at 06:57:31PM -0700, Darrick J. Wong wrote: >>> On Thu, Apr 16, 2020 at 05:37:19PM -0700, Ira Weiny wrote: >>> On Thu, Apr 16, 2020 at 03:49:37PM -0700, Darrick J. Wong wrote: >>>> On Thu, Apr 16, 2020 at 03:33:27PM -0700, Ira Weiny wrote: >>>>> On Thu, Apr 16, 2020 at 09:25:04AM -0700, Darrick J. Wong wrote: >>>>>> On Mon, Apr 13, 2020 at 09:00:26PM -0700, ira.weiny@intel.com wrote: >>>>>>> From: Ira Weiny <ira.weiny@intel.com> >>>>>>> >>>>>>> Add a flag to preserve FS_XFLAG_DAX in the ext4 inode. >>>>>>> >>>>>>> Set the flag to be user visible and changeable. Set the flag to be >>>>>>> inherited. Allow applications to change the flag at any time. >>>>>>> >>>>>>> Finally, on regular files, flag the inode to not be cached to facilitate >>>>>>> changing S_DAX on the next creation of the inode. >>>>>>> >>>>>>> Signed-off-by: Ira Weiny <ira.weiny@intel.com> >>>>>>> --- >>>>>>> fs/ext4/ext4.h | 13 +++++++++---- >>>>>>> fs/ext4/ioctl.c | 21 ++++++++++++++++++++- >>>>>>> 2 files changed, 29 insertions(+), 5 deletions(-) >>>>>>> >>>>>>> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h >>>>>>> index 61b37a052052..434021fcec88 100644 >>>>>>> --- a/fs/ext4/ext4.h >>>>>>> +++ b/fs/ext4/ext4.h >>>>>>> @@ -415,13 +415,16 @@ struct flex_groups { >>>>>>> #define EXT4_VERITY_FL 0x00100000 /* Verity protected inode */ >>>>>>> #define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */ >>>>>>> #define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */ >>>>>>> + >>>>>>> +#define EXT4_DAX_FL 0x00800000 /* Inode is DAX */ >>>>>> >>>>>> Sooo, fun fact about ext4 vs. the world-- >>>>>> >>>>>> The GETFLAGS/SETFLAGS ioctl, since it came from ext2, shares the same >>>>>> flag values as the ondisk inode flags in ext*. Therefore, each of these >>>>>> EXT4_[whatever]_FL values are supposed to have a FS_[whatever]_FL >>>>>> equivalent in include/uapi/linux/fs.h. >>>>> >>>>> Interesting... >>>>> >>>>>> >>>>>> (Note that the "[whatever]" is a straight translation since the same >>>>>> uapi header also defines the FS_XFLAG_[xfswhatever] flag values; ignore >>>>>> those.) >>>>>> >>>>>> Evidently, FS_NOCOW_FL already took 0x800000, but ext4.h was never >>>>>> updated to note that the value was taken. I think Ted might be inclined >>>>>> to reserve the ondisk inode bit just in case ext4 ever does support copy >>>>>> on write, though that's his call. :) >>>>> >>>>> Seems like I should change this... And I did not realize I was inherently >>>>> changing a bit definition which was exposed to other FS's... >>>> >>>> <nod> This whole thing is a mess, particularly now that we have two vfs >>>> ioctls to set per-fs inode attributes, both of which were inherited from >>>> other filesystems... :( >>>> >>> >>> Ok I've changed it. >>> >>>> >>>>>> >>>>>> Long story short - can you use 0x1000000 for this instead, and add the >>>>>> corresponding value to the uapi fs.h? I guess that also means that we >>>>>> can change FS_XFLAG_DAX (in the form of FS_DAX_FL in FSSETFLAGS) after >>>>>> that. >>>>> >>>>> :-/ >>>>> >>>>> Are there any potential users of FS_XFLAG_DAX now? >>>> >>>> Yes, it's in the userspace ABI so we can't get rid of it. >>>> >>>> (FWIW there are several flags that exist in both FS_XFLAG_* and FS_*_FL >>>> form.) >>>> >>>>> From what it looks like, changing FS_XFLAG_DAX to FS_DAX_FL would be pretty >>>>> straight forward. Just to be sure, looks like XFS converts the FS_[xxx]_FL to >>>>> FS_XFLAGS_[xxx] in xfs_merge_ioc_xflags()? But it does not look like all the >>>>> FS_[xxx]_FL flags are converted. Is is that XFS does not support those >>>>> options? Or is it depending on the VFS layer for some of them? >>>> >>>> XFS doesn't support most of the FS_*_FL flags. >>> >>> If FS_XFLAG_DAX needs to continue to be user visible I think we need to keep >>> that flag and we should not expose the EXT4_DAX_FL flag... >>> >>> I think that works for XFS. >>> >>> But for ext4 it looks like EXT4_FL_XFLAG_VISIBLE was intended to be used for >>> [GET|SET]XATTR where EXT4_FL_USER_VISIBLE was intended to for [GET|SET]FLAGS... >>> But if I don't add EXT4_DAX_FL in EXT4_FL_XFLAG_VISIBLE my test fails. >>> >>> I've been playing with the flags and looking at the code and I _thought_ the >>> following patch would ensure that FS_XFLAG_DAX is the only one visible but for >>> some reason FS_XFLAG_DAX can't be set with this patch. I still need the >>> EXT4_FL_USER_VISIBLE mask altered... Which I believe would expose EXT4_DAX_FL >>> directly as well. >>> >>> Jan, Ted? Any ideas? Or should we expose EXT4_DAX_FL and FS_XFLAG_DAX in >>> ext4? >> >> Both flags should be exposed through their respective ioctl interfaces >> in both filesystems. That way we don't have to add even more verbiage >> to the documentation to instruct userspace programmers on how to special >> case ext4 and XFS for the same piece of functionality. > > Wouldn't it be more confusing for the user to have 2 different flags which do > the same thing? > > I would think that using FS_XFLAG_DAX _only_ (for both ext4 and xfs) would be > easier without special cases? > > Ira >
On Fri, Apr 17, 2020 at 12:43:39AM -0600, Andreas Dilger wrote: > We still need to store an on-disk DAX flag for Ext4, and at that point it > doesn't make sense not to expose it via the standard Ext4 chattr utility. > > So having EXT4_DAX_FL (== FS_DAX_FL) is no extra effort to add. I'll leave it exposed then. Thanks, Ira > > Cheers, Andreas > > > On Apr 16, 2020, at 20:20, Ira Weiny <ira.weiny@intel.com> wrote: > > > > On Thu, Apr 16, 2020 at 06:57:31PM -0700, Darrick J. Wong wrote: > >>> On Thu, Apr 16, 2020 at 05:37:19PM -0700, Ira Weiny wrote: > >>> On Thu, Apr 16, 2020 at 03:49:37PM -0700, Darrick J. Wong wrote: > >>>> On Thu, Apr 16, 2020 at 03:33:27PM -0700, Ira Weiny wrote: > >>>>> On Thu, Apr 16, 2020 at 09:25:04AM -0700, Darrick J. Wong wrote: > >>>>>> On Mon, Apr 13, 2020 at 09:00:26PM -0700, ira.weiny@intel.com wrote: > >>>>>>> From: Ira Weiny <ira.weiny@intel.com> > >>>>>>> > >>>>>>> Add a flag to preserve FS_XFLAG_DAX in the ext4 inode. > >>>>>>> > >>>>>>> Set the flag to be user visible and changeable. Set the flag to be > >>>>>>> inherited. Allow applications to change the flag at any time. > >>>>>>> > >>>>>>> Finally, on regular files, flag the inode to not be cached to facilitate > >>>>>>> changing S_DAX on the next creation of the inode. > >>>>>>> > >>>>>>> Signed-off-by: Ira Weiny <ira.weiny@intel.com> > >>>>>>> --- > >>>>>>> fs/ext4/ext4.h | 13 +++++++++---- > >>>>>>> fs/ext4/ioctl.c | 21 ++++++++++++++++++++- > >>>>>>> 2 files changed, 29 insertions(+), 5 deletions(-) > >>>>>>> > >>>>>>> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > >>>>>>> index 61b37a052052..434021fcec88 100644 > >>>>>>> --- a/fs/ext4/ext4.h > >>>>>>> +++ b/fs/ext4/ext4.h > >>>>>>> @@ -415,13 +415,16 @@ struct flex_groups { > >>>>>>> #define EXT4_VERITY_FL 0x00100000 /* Verity protected inode */ > >>>>>>> #define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */ > >>>>>>> #define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */ > >>>>>>> + > >>>>>>> +#define EXT4_DAX_FL 0x00800000 /* Inode is DAX */ > >>>>>> > >>>>>> Sooo, fun fact about ext4 vs. the world-- > >>>>>> > >>>>>> The GETFLAGS/SETFLAGS ioctl, since it came from ext2, shares the same > >>>>>> flag values as the ondisk inode flags in ext*. Therefore, each of these > >>>>>> EXT4_[whatever]_FL values are supposed to have a FS_[whatever]_FL > >>>>>> equivalent in include/uapi/linux/fs.h. > >>>>> > >>>>> Interesting... > >>>>> > >>>>>> > >>>>>> (Note that the "[whatever]" is a straight translation since the same > >>>>>> uapi header also defines the FS_XFLAG_[xfswhatever] flag values; ignore > >>>>>> those.) > >>>>>> > >>>>>> Evidently, FS_NOCOW_FL already took 0x800000, but ext4.h was never > >>>>>> updated to note that the value was taken. I think Ted might be inclined > >>>>>> to reserve the ondisk inode bit just in case ext4 ever does support copy > >>>>>> on write, though that's his call. :) > >>>>> > >>>>> Seems like I should change this... And I did not realize I was inherently > >>>>> changing a bit definition which was exposed to other FS's... > >>>> > >>>> <nod> This whole thing is a mess, particularly now that we have two vfs > >>>> ioctls to set per-fs inode attributes, both of which were inherited from > >>>> other filesystems... :( > >>>> > >>> > >>> Ok I've changed it. > >>> > >>>> > >>>>>> > >>>>>> Long story short - can you use 0x1000000 for this instead, and add the > >>>>>> corresponding value to the uapi fs.h? I guess that also means that we > >>>>>> can change FS_XFLAG_DAX (in the form of FS_DAX_FL in FSSETFLAGS) after > >>>>>> that. > >>>>> > >>>>> :-/ > >>>>> > >>>>> Are there any potential users of FS_XFLAG_DAX now? > >>>> > >>>> Yes, it's in the userspace ABI so we can't get rid of it. > >>>> > >>>> (FWIW there are several flags that exist in both FS_XFLAG_* and FS_*_FL > >>>> form.) > >>>> > >>>>> From what it looks like, changing FS_XFLAG_DAX to FS_DAX_FL would be pretty > >>>>> straight forward. Just to be sure, looks like XFS converts the FS_[xxx]_FL to > >>>>> FS_XFLAGS_[xxx] in xfs_merge_ioc_xflags()? But it does not look like all the > >>>>> FS_[xxx]_FL flags are converted. Is is that XFS does not support those > >>>>> options? Or is it depending on the VFS layer for some of them? > >>>> > >>>> XFS doesn't support most of the FS_*_FL flags. > >>> > >>> If FS_XFLAG_DAX needs to continue to be user visible I think we need to keep > >>> that flag and we should not expose the EXT4_DAX_FL flag... > >>> > >>> I think that works for XFS. > >>> > >>> But for ext4 it looks like EXT4_FL_XFLAG_VISIBLE was intended to be used for > >>> [GET|SET]XATTR where EXT4_FL_USER_VISIBLE was intended to for [GET|SET]FLAGS... > >>> But if I don't add EXT4_DAX_FL in EXT4_FL_XFLAG_VISIBLE my test fails. > >>> > >>> I've been playing with the flags and looking at the code and I _thought_ the > >>> following patch would ensure that FS_XFLAG_DAX is the only one visible but for > >>> some reason FS_XFLAG_DAX can't be set with this patch. I still need the > >>> EXT4_FL_USER_VISIBLE mask altered... Which I believe would expose EXT4_DAX_FL > >>> directly as well. > >>> > >>> Jan, Ted? Any ideas? Or should we expose EXT4_DAX_FL and FS_XFLAG_DAX in > >>> ext4? > >> > >> Both flags should be exposed through their respective ioctl interfaces > >> in both filesystems. That way we don't have to add even more verbiage > >> to the documentation to instruct userspace programmers on how to special > >> case ext4 and XFS for the same piece of functionality. > > > > Wouldn't it be more confusing for the user to have 2 different flags which do > > the same thing? > > > > I would think that using FS_XFLAG_DAX _only_ (for both ext4 and xfs) would be > > easier without special cases? > > > > Ira > >
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 61b37a052052..434021fcec88 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -415,13 +415,16 @@ struct flex_groups { #define EXT4_VERITY_FL 0x00100000 /* Verity protected inode */ #define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */ #define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */ + +#define EXT4_DAX_FL 0x00800000 /* Inode is DAX */ + #define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */ #define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ #define EXT4_CASEFOLD_FL 0x40000000 /* Casefolded file */ #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ -#define EXT4_FL_USER_VISIBLE 0x705BDFFF /* User visible flags */ -#define EXT4_FL_USER_MODIFIABLE 0x604BC0FF /* User modifiable flags */ +#define EXT4_FL_USER_VISIBLE 0x70DBDFFF /* User visible flags */ +#define EXT4_FL_USER_MODIFIABLE 0x60CBC0FF /* User modifiable flags */ /* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */ #define EXT4_FL_XFLAG_VISIBLE (EXT4_SYNC_FL | \ @@ -429,14 +432,16 @@ struct flex_groups { EXT4_APPEND_FL | \ EXT4_NODUMP_FL | \ EXT4_NOATIME_FL | \ - EXT4_PROJINHERIT_FL) + EXT4_PROJINHERIT_FL | \ + EXT4_DAX_FL) /* Flags that should be inherited by new inodes from their parent. */ #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\ EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\ EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\ EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\ - EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL) + EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL |\ + EXT4_DAX_FL) /* Flags that are appropriate for regular files (all but dir-specific ones). */ #define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL |\ diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index ee3401a32e79..ca07d5086f03 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -539,12 +539,15 @@ static inline __u32 ext4_iflags_to_xflags(unsigned long iflags) xflags |= FS_XFLAG_NOATIME; if (iflags & EXT4_PROJINHERIT_FL) xflags |= FS_XFLAG_PROJINHERIT; + if (iflags & EXT4_DAX_FL) + xflags |= FS_XFLAG_DAX; return xflags; } #define EXT4_SUPPORTED_FS_XFLAGS (FS_XFLAG_SYNC | FS_XFLAG_IMMUTABLE | \ FS_XFLAG_APPEND | FS_XFLAG_NODUMP | \ - FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT) + FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT | \ + FS_XFLAG_DAX) /* Transfer xflags flags to internal */ static inline unsigned long ext4_xflags_to_iflags(__u32 xflags) @@ -563,6 +566,8 @@ static inline unsigned long ext4_xflags_to_iflags(__u32 xflags) iflags |= EXT4_NOATIME_FL; if (xflags & FS_XFLAG_PROJINHERIT) iflags |= EXT4_PROJINHERIT_FL; + if (xflags & FS_XFLAG_DAX) + iflags |= EXT4_DAX_FL; return iflags; } @@ -813,6 +818,17 @@ static int ext4_ioctl_get_es_cache(struct file *filp, unsigned long arg) return error; } +static void ext4_dax_dontcache(struct inode *inode, unsigned int flags) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + + if (S_ISDIR(inode->i_mode)) + return; + + if ((ei->i_flags ^ flags) == EXT4_DAX_FL) + inode->i_state |= I_DONTCACHE; +} + long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -1273,6 +1289,9 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return err; inode_lock(inode); + + ext4_dax_dontcache(inode, flags); + ext4_fill_fsxattr(inode, &old_fa); err = vfs_ioc_fssetxattr_check(inode, &old_fa, &fa); if (err)