diff mbox

possible different donor file naming in e4defrag

Message ID BAY179-W1E8676C4343760B6A8332FDCC0@phx.gbl
State New, archived
Headers show

Commit Message

TR Reardon Sept. 11, 2014, 10:49 p.m. UTC
> On Sep 11, 2014, at 1:48 PM, TR Reardon <thomas_reardon@hotmail.com> wrote:
>> Picking this back up. How would O_TMPFILE avoid races? It definitely avoids the unwanted mtime/atime update, but then the existing "<filename>.defrag" pseudo-lock file would no longer be available. How could you use O_TMPFILE and still avoid multiple defrag? If this isn't possible, then resetting the parent times on unlink(tmpfile), as you suggest, is the simplest way out of this.
>
> Looking at this the opposite way - what are the chances that there
> will be concurrent defrags on the same file? Basically no chance at
> all. So long as it doesn't explode (the kernel would need to protect
> against this anyway to avoid malicious apps), the worst case is that
> there will be some extra defragmentation done in a very rare case.
>
> Conversely, creating a temporary filename and then resetting the
> parent directory timestamp is extra work for every file defragmented,
> and is racy because e4defrag may "reset" the time to before the temp
> file was created, but clobber a legitimate timestamp update in the
> directory from some other concurrent update. That timestamp update
> is always going to be racy, even if e4defrag tries to be careful.
>
> Cheers, Andreas


Thanks, well described.

So a simple attempt with O_TMPFILE first, then revert to original behavior, like below?



 		 	   		  --
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

TR Reardon Sept. 11, 2014, 11:03 p.m. UTC | #1
(attaching same, to fix whitespace...I suppose I should configure a proper email client sometime)

+Reardon


----------------------------------------
> From: thomas_reardon@hotmail.com
> To: adilger@dilger.ca
> CC: darrick.wong@oracle.com; linux-ext4@vger.kernel.org
> Subject: RE: possible different donor file naming in e4defrag
> Date: Thu, 11 Sep 2014 18:49:07 -0400
>
>> On Sep 11, 2014, at 1:48 PM, TR Reardon <thomas_reardon@hotmail.com> wrote:
>>> Picking this back up. How would O_TMPFILE avoid races? It definitely avoids the unwanted mtime/atime update, but then the existing "<filename>.defrag" pseudo-lock file would no longer be available. How could you use O_TMPFILE and still avoid multiple defrag? If this isn't possible, then resetting the parent times on unlink(tmpfile), as you suggest, is the simplest way out of this.
>>
>> Looking at this the opposite way - what are the chances that there
>> will be concurrent defrags on the same file? Basically no chance at
>> all. So long as it doesn't explode (the kernel would need to protect
>> against this anyway to avoid malicious apps), the worst case is that
>> there will be some extra defragmentation done in a very rare case.
>>
>> Conversely, creating a temporary filename and then resetting the
>> parent directory timestamp is extra work for every file defragmented,
>> and is racy because e4defrag may "reset" the time to before the temp
>> file was created, but clobber a legitimate timestamp update in the
>> directory from some other concurrent update. That timestamp update
>> is always going to be racy, even if e4defrag tries to be careful.
>>
>> Cheers, Andreas
>
>
> Thanks, well described.
>
> So a simple attempt with O_TMPFILE first, then revert to original behavior, like below?
>
>
> diff --git a/misc/e4defrag.c b/misc/e4defrag.c
> index d0eac60..8001182 100644
> --- a/misc/e4defrag.c
> +++ b/misc/e4defrag.c
> @@ -40,6 +40,7 @@
> #include <sys/stat.h>
> #include <sys/statfs.h>
> #include <sys/vfs.h>
> +#include <libgen.h>
>
> /* A relatively new ioctl interface ... */
> #ifndef EXT4_IOC_MOVE_EXT
> @@ -1526,31 +1527,36 @@ static int file_defrag(const char *file, const struct stat64 *buf,
>
> /* Create donor inode */
> memset(tmp_inode_name, 0, PATH_MAX + 8);
> - sprintf(tmp_inode_name, "%.*s.defrag",
> - (int)strnlen(file, PATH_MAX), file);
> - donor_fd = open64(tmp_inode_name, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR);
> + /* Try O_TMPFILE first, to avoid changing directory mtime */
> + sprintf(tmp_inode_name, "%.*s", (int)strnlen(file, PATH_MAX), file);
> + donor_fd = open64(dirname(tmp_inode_name), O_TMPFILE | O_WRONLY | O_EXCL, S_IRUSR | S_IWUSR );
> if (donor_fd < 0) {
> - if (mode_flag & DETAIL) {
> - PRINT_FILE_NAME(file);
> - if (errno == EEXIST)
> - PRINT_ERR_MSG_WITH_ERRNO(
> - "File is being defraged by other program");
> - else
> - PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_OPEN);
> + sprintf(tmp_inode_name, "%.*s.defrag",
> + (int)strnlen(file, PATH_MAX), file);
> + donor_fd = open64(tmp_inode_name, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR);
> + if (donor_fd < 0) {
> + if (mode_flag & DETAIL) {
> + PRINT_FILE_NAME(file);
> + if (errno == EEXIST)
> + PRINT_ERR_MSG_WITH_ERRNO(
> + "File is being defraged by other program");
> + else
> + PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_OPEN);
> + }
> + goto out;
> }
> - goto out;
> - }
>
> - /* Unlink donor inode */
> - ret = unlink(tmp_inode_name);
> - if (ret < 0) {
> - if (mode_flag & DETAIL) {
> - PRINT_FILE_NAME(file);
> - PRINT_ERR_MSG_WITH_ERRNO("Failed to unlink");
> + /* Unlink donor inode */
> + ret = unlink(tmp_inode_name);
> + if (ret < 0) {
> + if (mode_flag & DETAIL) {
> + PRINT_FILE_NAME(file);
> + PRINT_ERR_MSG_WITH_ERRNO("Failed to unlink");
> + }
> + goto out;
> }
> - goto out;
> }
> -
> +
> /* Allocate space for donor inode */
> orig_group_tmp = orig_group_head;
> do {
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
Andreas Dilger Sept. 12, 2014, 7:41 p.m. UTC | #2
On Sep 11, 2014, at 5:03 PM, TR Reardon <thomas_reardon@hotmail.com> wrote:
> diff --git a/misc/e4defrag.c b/misc/e4defrag.c
> index d0eac60..8001182 100644
> --- a/misc/e4defrag.c
> +++ b/misc/e4defrag.c
> @@ -1526,31 +1527,36 @@ static int file_defrag(const char *file, const struct stat64 *buf,
>  
>  	/* Create donor inode */
>  	memset(tmp_inode_name, 0, PATH_MAX + 8);
> -	sprintf(tmp_inode_name, "%.*s.defrag",
> -				(int)strnlen(file, PATH_MAX), file);
> -	donor_fd = open64(tmp_inode_name, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR);
> +	/* Try O_TMPFILE first, to avoid changing directory mtime */
> +	sprintf(tmp_inode_name, "%.*s", (int)strnlen(file, PATH_MAX), file);
> +	donor_fd = open64(dirname(tmp_inode_name), O_TMPFILE | O_WRONLY | O_EXCL, S_IRUSR | S_IWUSR );

Lines need to be <= 80 columns.  Please run patch through checkpatch.pl.

Why is it opened O_WRONLY, but the permissions are S_IRUSR | S_IWUSR?
I agree it didn't make sense in the old code to have S_IRUSR either,
but I don't think this makes more sense.  If the file is write-only
(which is probably correct, unless e4defrag is doing some post-copy
checksum of the data) then S_IWUSR would be enough.

>  	if (donor_fd < 0) {
> -		if (mode_flag & DETAIL) {
> -			PRINT_FILE_NAME(file);
> -			if (errno == EEXIST)
> -				PRINT_ERR_MSG_WITH_ERRNO(
> -				"File is being defraged by other program");
> -			else
> -				PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_OPEN);
> +		sprintf(tmp_inode_name, "%.*s.defrag",
> +					(int)strnlen(file, PATH_MAX), file);
> +		donor_fd = open64(tmp_inode_name, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR);

Wrap at 80 columns.

This has the same issue with O_WRONLY and S_IRUSR, though it at least
matches the old code.

> +		if (donor_fd < 0) {
> +			if (mode_flag & DETAIL) {
> +				PRINT_FILE_NAME(file);
> +				if (errno == EEXIST)
> +					PRINT_ERR_MSG_WITH_ERRNO(
> +					"File is being defraged by other program");
> +				else
> +					PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_OPEN);
> +			}
> +			goto out;
>  		}
> -		goto out;
> -	}
>  
> -	/* Unlink donor inode */
> -	ret = unlink(tmp_inode_name);
> -	if (ret < 0) {
> -		if (mode_flag & DETAIL) {
> -			PRINT_FILE_NAME(file);
> -			PRINT_ERR_MSG_WITH_ERRNO("Failed to unlink");
> +		/* Unlink donor inode */
> +		ret = unlink(tmp_inode_name);
> +		if (ret < 0) {
> +			if (mode_flag & DETAIL) {
> +				PRINT_FILE_NAME(file);
> +				PRINT_ERR_MSG_WITH_ERRNO("Failed to unlink");
> +			}
> +			goto out;
>  		}

Shouldn't it reset the timestamp in this case?

Cheers, Andreas

> -		goto out;
>  	}
> -
> +	
>  	/* Allocate space for donor inode */
>  	orig_group_tmp = orig_group_head;
>  	do {


> ----------------------------------------
>> From: thomas_reardon@hotmail.com
>> To: adilger@dilger.ca
>> CC: darrick.wong@oracle.com; linux-ext4@vger.kernel.org
>> Subject: RE: possible different donor file naming in e4defrag
>> Date: Thu, 11 Sep 2014 18:49:07 -0400
>> 
>>> On Sep 11, 2014, at 1:48 PM, TR Reardon <thomas_reardon@hotmail.com> wrote:
>>>> Picking this back up. How would O_TMPFILE avoid races? It definitely avoids the unwanted mtime/atime update, but then the existing "<filename>.defrag" pseudo-lock file would no longer be available. How could you use O_TMPFILE and still avoid multiple defrag? If this isn't possible, then resetting the parent times on unlink(tmpfile), as you suggest, is the simplest way out of this.
>>> 
>>> Looking at this the opposite way - what are the chances that there
>>> will be concurrent defrags on the same file? Basically no chance at
>>> all. So long as it doesn't explode (the kernel would need to protect
>>> against this anyway to avoid malicious apps), the worst case is that
>>> there will be some extra defragmentation done in a very rare case.
>>> 
>>> Conversely, creating a temporary filename and then resetting the
>>> parent directory timestamp is extra work for every file defragmented,
>>> and is racy because e4defrag may "reset" the time to before the temp
>>> file was created, but clobber a legitimate timestamp update in the
>>> directory from some other concurrent update. That timestamp update
>>> is always going to be racy, even if e4defrag tries to be careful.
>>> 
>>> Cheers, Andreas
>> 
>> 
>> Thanks, well described.
>> 
>> So a simple attempt with O_TMPFILE first, then revert to original behavior, like below?
>> 
>> 
>> diff --git a/misc/e4defrag.c b/misc/e4defrag.c
>> index d0eac60..8001182 100644
>> --- a/misc/e4defrag.c
>> +++ b/misc/e4defrag.c
>> @@ -40,6 +40,7 @@
>> #include <sys/stat.h>
>> #include <sys/statfs.h>
>> #include <sys/vfs.h>
>> +#include <libgen.h>
>> 
>> /* A relatively new ioctl interface ... */
>> #ifndef EXT4_IOC_MOVE_EXT
>> @@ -1526,31 +1527,36 @@ static int file_defrag(const char *file, const struct stat64 *buf,
>> 
>> /* Create donor inode */
>> memset(tmp_inode_name, 0, PATH_MAX + 8);
>> - sprintf(tmp_inode_name, "%.*s.defrag",
>> - (int)strnlen(file, PATH_MAX), file);
>> - donor_fd = open64(tmp_inode_name, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR);
>> + /* Try O_TMPFILE first, to avoid changing directory mtime */
>> + sprintf(tmp_inode_name, "%.*s", (int)strnlen(file, PATH_MAX), file);
>> + donor_fd = open64(dirname(tmp_inode_name), O_TMPFILE | O_WRONLY | O_EXCL, S_IRUSR | S_IWUSR );
>> if (donor_fd < 0) {
>> - if (mode_flag & DETAIL) {
>> - PRINT_FILE_NAME(file);
>> - if (errno == EEXIST)
>> - PRINT_ERR_MSG_WITH_ERRNO(
>> - "File is being defraged by other program");
>> - else
>> - PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_OPEN);
>> + sprintf(tmp_inode_name, "%.*s.defrag",
>> + (int)strnlen(file, PATH_MAX), file);
>> + donor_fd = open64(tmp_inode_name, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR);
>> + if (donor_fd < 0) {
>> + if (mode_flag & DETAIL) {
>> + PRINT_FILE_NAME(file);
>> + if (errno == EEXIST)
>> + PRINT_ERR_MSG_WITH_ERRNO(
>> + "File is being defraged by other program");
>> + else
>> + PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_OPEN);
>> + }
>> + goto out;
>> }
>> - goto out;
>> - }
>> 
>> - /* Unlink donor inode */
>> - ret = unlink(tmp_inode_name);
>> - if (ret < 0) {
>> - if (mode_flag & DETAIL) {
>> - PRINT_FILE_NAME(file);
>> - PRINT_ERR_MSG_WITH_ERRNO("Failed to unlink");
>> + /* Unlink donor inode */
>> + ret = unlink(tmp_inode_name);
>> + if (ret < 0) {
>> + if (mode_flag & DETAIL) {
>> + PRINT_FILE_NAME(file);
>> + PRINT_ERR_MSG_WITH_ERRNO("Failed to unlink");
>> + }
>> + goto out;
>> }
>> - goto out;
>> }
>> -
>> +
>> /* Allocate space for donor inode */
>> orig_group_tmp = orig_group_head;
>> do {
>> 
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
> 		 	   		  <DEFRAG_O_TMPFILE>


Cheers, Andreas
diff mbox

Patch

diff --git a/misc/e4defrag.c b/misc/e4defrag.c
index d0eac60..8001182 100644
--- a/misc/e4defrag.c
+++ b/misc/e4defrag.c
@@ -40,6 +40,7 @@ 
 #include <sys/stat.h>
 #include <sys/statfs.h>
 #include <sys/vfs.h>
+#include <libgen.h>
 
 /* A relatively new ioctl interface ... */
 #ifndef EXT4_IOC_MOVE_EXT
@@ -1526,31 +1527,36 @@  static int file_defrag(const char *file, const struct stat64 *buf,
 
 	/* Create donor inode */
 	memset(tmp_inode_name, 0, PATH_MAX + 8);
-	sprintf(tmp_inode_name, "%.*s.defrag",
-				(int)strnlen(file, PATH_MAX), file);
-	donor_fd = open64(tmp_inode_name, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR);
+	/* Try O_TMPFILE first, to avoid changing directory mtime */
+	sprintf(tmp_inode_name, "%.*s", (int)strnlen(file, PATH_MAX), file);
+	donor_fd = open64(dirname(tmp_inode_name), O_TMPFILE | O_WRONLY | O_EXCL, S_IRUSR | S_IWUSR );
 	if (donor_fd < 0) {
-		if (mode_flag & DETAIL) {
-			PRINT_FILE_NAME(file);
-			if (errno == EEXIST)
-				PRINT_ERR_MSG_WITH_ERRNO(
-				"File is being defraged by other program");
-			else
-				PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_OPEN);
+		sprintf(tmp_inode_name, "%.*s.defrag",
+					(int)strnlen(file, PATH_MAX), file);
+		donor_fd = open64(tmp_inode_name, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR);
+		if (donor_fd < 0) {
+			if (mode_flag & DETAIL) {
+				PRINT_FILE_NAME(file);
+				if (errno == EEXIST)
+					PRINT_ERR_MSG_WITH_ERRNO(
+					"File is being defraged by other program");
+				else
+					PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_OPEN);
+			}
+			goto out;
 		}
-		goto out;
-	}
 
-	/* Unlink donor inode */
-	ret = unlink(tmp_inode_name);
-	if (ret < 0) {
-		if (mode_flag & DETAIL) {
-			PRINT_FILE_NAME(file);
-			PRINT_ERR_MSG_WITH_ERRNO("Failed to unlink");
+		/* Unlink donor inode */
+		ret = unlink(tmp_inode_name);
+		if (ret < 0) {
+			if (mode_flag & DETAIL) {
+				PRINT_FILE_NAME(file);
+				PRINT_ERR_MSG_WITH_ERRNO("Failed to unlink");
+			}
+			goto out;
 		}
-		goto out;
 	}
-
+	
 	/* Allocate space for donor inode */
 	orig_group_tmp = orig_group_head;
 	do {