diff mbox series

[v3] ctime.3: EXAMPLES: Add example program

Message ID e9e31a505f59c75ae5f9549b67102a433b39b42c.1724370362.git.alx@kernel.org
State New
Headers show
Series [v3] ctime.3: EXAMPLES: Add example program | expand

Commit Message

Alejandro Colomar Aug. 22, 2024, 11:49 p.m. UTC
This example documents the corner cases of mktime(3), such as what
happens during DST transitions, and other jumps in the calendar.

Link: <https://www.redhat.com/en/blog/brief-history-mktime>
Reported-by: DJ Delorie <dj@redhat.com>
Cc: Paul Eggert <eggert@cs.ucla.edu>
Signed-off-by: Alejandro Colomar <alx@kernel.org>
---

Hi DJ, Paul!

Below is the rendered text.  I've tested this program with several
"weird" times, and it all makes sense.  Please review.

I call it v3, since it supersedes DJ's patches.

Have a lovely night!
Alex

EXAMPLES
     Passing an invalid time to mktime()  or  an  invalid  tm‐>tm_isdst
     value  yields  unspecified results.  Also, passing the value -1 in
     tm‐>tm_isdst will result in an  ambiguous  time  during  some  DST
     transitions, which will also yield an unspecified result.

     The program below uses a wrapper that allows detecting invalid and
     ambiguous times, with EINVAL and ENOTUNIQ, respectively.

     The following shell session shows sample runs of the program:

         $ export TZ=Europe/Madrid;
         $
         $ ./a.out 2024 08 23 00 17 53 -1;
         1724365073
         $ ./a.out 2024 08 23 00 17 53 0;
         a.out: mktime: Invalid argument
         1724368673
         $ ./a.out 2024 08 23 00 17 53 1;
         1724365073
         $
         $ ./a.out 2024 02 23 00 17 53 -1;
         1708643873
         $ ./a.out 2024 02 23 00 17 53 0;
         1708643873
         $ ./a.out 2024 02 23 00 17 53 1;
         a.out: mktime: Invalid argument
         1708640273
         $
         $ ./a.out 2024 03 26 02 17 53 -1;
         a.out: mktime: Invalid argument
         1679793473
         $
         $ ./a.out 2024 10 29 02 17 53 -1;
         a.out: mktime: Name not unique on network
         1698542273
         $ ./a.out 2024 10 29 02 17 53 0;
         1698542273
         $ ./a.out 2024 10 29 02 17 53 1;
         1698538673
         $
         $ ./a.out 2024 02 29 12 00 00 -1;
         a.out: mktime: Invalid argument
         1677668400

   Program source: mktime.c

     #include <err.h>
     #include <errno.h>
     #include <stdint.h>
     #include <stdio.h>
     #include <stdlib.h>
     #include <time.h>

     time_t my_mktime(struct tm *tp);

     int
     main(int argc, char *argv[])
     {
         char       **p;
         time_t     t;
         struct tm  tm;

         if (argc != 8) {
             fprintf(stderr, "Usage: %s yyyy mm dd HH MM SS isdst\n", argv[0]);
             exit(EXIT_FAILURE);
         }

         p = &argv[1];
         tm.tm_year  = atoi(*p++) - 1900;
         tm.tm_mon   = atoi(*p++) - 1;
         tm.tm_mday  = atoi(*p++);
         tm.tm_hour  = atoi(*p++);
         tm.tm_min   = atoi(*p++);
         tm.tm_sec   = atoi(*p++);
         tm.tm_isdst = atoi(*p++);

         errno = 0;
         t = my_mktime(&tm);
         if (errno == EOVERFLOW)
             err(EXIT_FAILURE, "mktime");
         if (errno == EINVAL || errno == ENOTUNIQ)
             warn("mktime");

         printf("%ju\n", (uintmax_t) t);
         exit(EXIT_SUCCESS);
     }

     time_t
     my_mktime(struct tm *tp)
     {
         int        e, isdst;
         time_t     t;
         struct tm  tm;

         e = errno;
         errno = 0;

         tm = *tp;
         isdst = tp->tm_isdst;

         t = mktime(tp);
         if (t == -1 && errno == EOVERFLOW)
             return -1;

         if (isdst == -1)
             tm.tm_isdst = tp->tm_isdst;

         if (   tm.tm_sec   != tp->tm_sec
             || tm.tm_min   != tp->tm_min
             || tm.tm_hour  != tp->tm_hour
             || tm.tm_mday  != tp->tm_mday
             || tm.tm_mon   != tp->tm_mon
             || tm.tm_year  != tp->tm_year
             || tm.tm_isdst != tp->tm_isdst)
         {
             errno = EINVAL;
             return t;
         }

         if (isdst != -1)
             goto out;

         tm = *tp;
         tm.tm_isdst = !tm.tm_isdst;

         if (mktime(&tm) == -1 && errno == EOVERFLOW)
             goto out;

         if (tm.tm_isdst != tp->tm_isdst) {
             errno = ENOTUNIQ;
             return t;
         }
     out:
         errno = e;
         return t;
     }


 man/man3/ctime.3 | 157 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 157 insertions(+)


base-commit: 0813c125d8bf754c40015aa1b31f23e0650584ac

Comments

Paul Eggert Aug. 23, 2024, 4:57 a.m. UTC | #1
On 2024-08-22 16:49, Alejandro Colomar wrote:
>           t = mktime(tp);
>           if (t == -1 && errno == EOVERFLOW)
>               return -1;

This isn't right, for the same reason similar code wasn't right earlier. 
A successful call to mktime can return -1 and set errno to whatever 
value it likes.

This is just the first problem I found with the code (and I found it 
quickly because I remembered the earlier problem). I would expect there 
to be others.

How about if we omit the sample code and make the minimal changes I 
suggested earlier?
Xi Ruoyao Aug. 23, 2024, 7:26 a.m. UTC | #2
On Fri, 2024-08-23 at 09:02 +0200, Alejandro Colomar wrote:
> Is mktime(3) allowed to return -1 and set EOVERFLOW on a successful
> call?
> 
> RETURN VALUE
>      The mktime() function shall return the specified  time  since  the
>      Epoch  encoded  as  a value of type time_t.  If the time since the
>      Epoch cannot be represented, the function shall return  the  value
>      (time_t)-1 and set errno to indicate the error.

For mktime the standard only says "return (time_t)-1."  It does not
mention errno at all.  And the standard also says:

   The value of errno may be set to nonzero by a library function call
   whether or not there is an error, provided the use of errno is not
   documented in the description of the function in this document.

> Then I think the API is completely broken.  How should we check for
> errors after a mktime(3) call?

Maybe, special case if tm contains Dec 31 1969 23:59:59 UTC...  But it's
just stupid.

> If this is so, let me file a glibc bug requesting a fix of the API,
> adding a promise that on success, errno will remain unset.

It's a bug in the standard, not glibc.  And the standard has deprecated
it anyway.

https://www.open-std.org/JTC1/SC22/WG14/www/docs/n2566.pdf

> > How about if we omit the sample code and make the minimal changes I
> > suggested earlier?
> 
> Because I'm being very careful writing that code, and still I'm having
> trouble doing that, I think we must provide some example of a correct
> call, to prevent many other programmers from doing it wrong.

So IMO you should just say "the interface is deprecated, do not use it
in any new code."
Vincent Lefevre Aug. 23, 2024, 7:57 a.m. UTC | #3
On 2024-08-23 15:26:04 +0800, Xi Ruoyao wrote:
> On Fri, 2024-08-23 at 09:02 +0200, Alejandro Colomar wrote:
> > Is mktime(3) allowed to return -1 and set EOVERFLOW on a successful
> > call?
> > 
> > RETURN VALUE
> >      The mktime() function shall return the specified  time  since  the
> >      Epoch  encoded  as  a value of type time_t.  If the time since the
> >      Epoch cannot be represented, the function shall return  the  value
> >      (time_t)-1 and set errno to indicate the error.
> 
> For mktime the standard only says "return (time_t)-1."  It does not
> mention errno at all.  And the standard also says:
> 
>    The value of errno may be set to nonzero by a library function call
>    whether or not there is an error, provided the use of errno is not
>    documented in the description of the function in this document.
> 
> > Then I think the API is completely broken.  How should we check for
> > errors after a mktime(3) call?
> 
> Maybe, special case if tm contains Dec 31 1969 23:59:59 UTC...  But it's
> just stupid.
> 
> > If this is so, let me file a glibc bug requesting a fix of the API,
> > adding a promise that on success, errno will remain unset.
> 
> It's a bug in the standard, not glibc.  And the standard has deprecated
> it anyway.
> 
> https://www.open-std.org/JTC1/SC22/WG14/www/docs/n2566.pdf

I can see only asctime and ctime mentioned there, not mktime.
So mktime isn't deprecated, is it?
Alejandro Colomar Aug. 23, 2024, 12:28 p.m. UTC | #4
Hi Xi,

On Fri, Aug 23, 2024 at 03:26:04PM GMT, Xi Ruoyao wrote:
> On Fri, 2024-08-23 at 09:02 +0200, Alejandro Colomar wrote:
> > Is mktime(3) allowed to return -1 and set EOVERFLOW on a successful
> > call?
> > 
> > RETURN VALUE
> >      The mktime() function shall return the specified  time  since  the
> >      Epoch  encoded  as  a value of type time_t.  If the time since the
> >      Epoch cannot be represented, the function shall return  the  value
> >      (time_t)-1 and set errno to indicate the error.
> 
> For mktime the standard only says "return (time_t)-1."  It does not
> mention errno at all.  And the standard also says:
> 
>    The value of errno may be set to nonzero by a library function call
>    whether or not there is an error, provided the use of errno is not
>    documented in the description of the function in this document.
> 
> > Then I think the API is completely broken.  How should we check for
> > errors after a mktime(3) call?
> 
> Maybe, special case if tm contains Dec 31 1969 23:59:59 UTC...  But it's
> just stupid.
> 
> > If this is so, let me file a glibc bug requesting a fix of the API,
> > adding a promise that on success, errno will remain unset.
> 
> It's a bug in the standard, not glibc.  And the standard has deprecated
> it anyway.
> 
> https://www.open-std.org/JTC1/SC22/WG14/www/docs/n2566.pdf

The standard has only codified existing practice regarding mktime(3).
mktime(3) was already standardized in POSIX.1-1988 before ANSI C.  And
considering that POSIX usually just codifies existing practice without
inventions, the blame is on the implementations.

So, yes, all implementations and the standard seem sub-par.

Although, I've been checking ISO C after your comments, and I found an
interesting specification in ISO C that is not present in POSIX, and is
also missing in the Linux manual page:

tm_wday is guaranteed to be left unmodified on a failed call.  This
provides a way to determine if the call failed.  Indeed, this is the
only way to determine if the call failed:

	tm.tm_wday = INT_MAX;
	mktime(&tm);
	if (tm.tm_wday == INT_MAX)
		err(1, "mktime");

I'll add this to the manual page.  This makes mktime(3) not broken.
Although it is still quite obscure.  Every call that I've seen in a
search either don't check for errors, or check them incorrectly.  A
better API is definitely possible, and it would even keep backwards
compatibility.

> 
> > > How about if we omit the sample code and make the minimal changes I
> > > suggested earlier?
> > 
> > Because I'm being very careful writing that code, and still I'm having
> > trouble doing that, I think we must provide some example of a correct
> > call, to prevent many other programmers from doing it wrong.
> 
> So IMO you should just say "the interface is deprecated, do not use it
> in any new code."

mktime(3) is not deprecated, as Vincent pointed out.

Have a lovely day!
Alex
Xi Ruoyao Aug. 23, 2024, 12:28 p.m. UTC | #5
On Fri, 2024-08-23 at 14:28 +0200, Alejandro Colomar wrote:
> > > Because I'm being very careful writing that code, and still I'm having
> > > trouble doing that, I think we must provide some example of a correct
> > > call, to prevent many other programmers from doing it wrong.
> > 
> > So IMO you should just say "the interface is deprecated, do not use it
> > in any new code."
> 
> mktime(3) is not deprecated, as Vincent pointed out.

I must be lacking sleeping time recent days :(.
Vincent Lefevre Aug. 23, 2024, 12:53 p.m. UTC | #6
On 2024-08-23 14:28:13 +0200, Alejandro Colomar wrote:
[about mktime]
> tm_wday is guaranteed to be left unmodified on a failed call.

Where did you see that?

I cannot see any guarantee in case of a failed call, so that I would
say that tm_wday could have been modified, e.g. if the values are
set before checking whether the calendar time can be represented.

> This provides a way to determine if the call failed.

The example in C17 does not use the above claim that "tm_wday is
guaranteed to be left unmodified on a failed call" to determine
whether the call failed. Instead, it uses

  if (mktime(&time_str) == (time_t)(-1))

which is not 100% correct, since -1 can be a valid value (as already
noticed).

> Indeed, this is the only way to determine if the call failed:
> 
> 	tm.tm_wday = INT_MAX;
> 	mktime(&tm);
> 	if (tm.tm_wday == INT_MAX)
> 		err(1, "mktime");

Because of my above remark, I think that a mktime(&tm) == (time_t)(-1)
test is needed *in addition to* the tm.tm_wday == INT_MAX test.
Alejandro Colomar Aug. 23, 2024, 1:12 p.m. UTC | #7
Hi Vincent,

On Fri, Aug 23, 2024 at 02:53:13PM GMT, Vincent Lefevre wrote:
> On 2024-08-23 14:28:13 +0200, Alejandro Colomar wrote:
> [about mktime]
> > tm_wday is guaranteed to be left unmodified on a failed call.
> 
> Where did you see that?

Hmmm, it seems a novelty of C23.  I don't find it in C11.

Here's the text in C23:

<https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3220.pdf#subsubsection.7.29.2.3>

	If the calendar time cannot be represented in the time_t
	encoding used for the return value or the value to be returned
	in the tm_year component of the structure pointed to by timeptr
	cannot be represented as an int, the function returns the value
	(time_t)(-1) and does not change the value of the tm_wday
	component of the structure.

And the example code has also been modified in C23 to use this feature:

	static const char *const wday[] = {
		"Sunday", "Monday", "Tuesday", "Wednesday",
		"Thursday", "Friday", "Saturday", "-unknown-"
	};

	...

	time_str.tm_wday = 7;
	mktime(&time_str);
	printf("%s\n", wday[time_str.tm_wday]);

> I cannot see any guarantee in case of a failed call, so that I would
> say that tm_wday could have been modified, e.g. if the values are
> set before checking whether the calendar time can be represented.

Looking at the WG14 document logs, it seems it was added in n3147:
<https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3147.txt>

I've CCed Robert (author of that paper) and Jens (proposed other changes
to mktime(3) recently), which may know what was discussed there.  I
assume they checked that this is true in all existing implementations,
but that's just my assumption, so maybe they can say something.

> > This provides a way to determine if the call failed.
> 
> The example in C17 does not use the above claim that "tm_wday is
> guaranteed to be left unmodified on a failed call" to determine
> whether the call failed. Instead, it uses
> 
>   if (mktime(&time_str) == (time_t)(-1))
> 
> which is not 100% correct, since -1 can be a valid value (as already
> noticed).
> 
> > Indeed, this is the only way to determine if the call failed:
> > 
> > 	tm.tm_wday = INT_MAX;
> > 	mktime(&tm);
> > 	if (tm.tm_wday == INT_MAX)
> > 		err(1, "mktime");
> 
> Because of my above remark, I think that a mktime(&tm) == (time_t)(-1)
> test is needed *in addition to* the tm.tm_wday == INT_MAX test.

Have a lovely day!
Alex
Vincent Lefevre Aug. 23, 2024, 1:54 p.m. UTC | #8
On 2024-08-23 15:12:16 +0200, Alejandro Colomar wrote:
> Looking at the WG14 document logs, it seems it was added in n3147:
> <https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3147.txt>

Thanks for the reference. Additional details can be found
in CD2 ballot at

  https://open-std.org/JTC1/SC22/WG14/www/docs/n3148.doc

which references the POSIX bug

  https://austingroupbugs.net/view.php?id=1614

where I can see at the end:

  On page 1332 line 44348 section mktime(), change:

    if (mktime(&time_str) == -1)

  to:

    time_str.tm_wday = -1;
    if (mktime(&time_str) == (time_t)-1 && time_str.tm_wday == -1)

This is the test I suggested: a check that mktime() returns -1,
and since it can be a valid value, a second test on tm_wday
(where the input, which is ignored, has an invalid value such
as -1 here, or INT_MAX in your case; note that -1 may be more
efficient with some processors, and shorter to write).
Brian Inglis Aug. 23, 2024, 2:04 p.m. UTC | #9
AFAICS from 9899 1998 Draft to 2023, the *mktime* wording has been essentially:

"The original values of the tm_wday and tm_yday components of the structure are 
ignored, and the original values of the other components are not restricted to 
the ranges indicated above. On successful completion, the values of the tm_wday 
and tm_yday components of the structure are set appropriately,"

so the recommendation has been to do something like (pointless example):

	time_t tt = time(&tt);
	struct tm *tm = localtime(&tt);
	tm->tm_wday = tm->tm_yday = -1;
	if ((tt = mktime(tm)) == -1 && tm->tm_wday == -1 && tm->tm_yday == -1)
		error(...);
Alejandro Colomar Aug. 23, 2024, 2:18 p.m. UTC | #10
Hi Vincent,

On Fri, Aug 23, 2024 at 03:54:49PM GMT, Vincent Lefevre wrote:
> On 2024-08-23 15:12:16 +0200, Alejandro Colomar wrote:
> > Looking at the WG14 document logs, it seems it was added in n3147:
> > <https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3147.txt>
> 
> Thanks for the reference. Additional details can be found
> in CD2 ballot at
> 
>   https://open-std.org/JTC1/SC22/WG14/www/docs/n3148.doc

It's interesting that WG14 claims that they're not aware of any existing
implementations that would modify tm_wday on failure.

Although it's weird, because WG14 attributes that claim to the Austin
Group, and

> 
> which references the POSIX bug
> 
>   https://austingroupbugs.net/view.php?id=1614

I don't see any discussion about tm_wday in that Austin Group bug.  :|
Maybe it happened in a mailing list or elsewhere.  Anyway, let's trust
that claim.  (If any implementation does not conform, at least it should
be feasible to fix that implementation to conform.)

> where I can see at the end:
> 
>   On page 1332 line 44348 section mktime(), change:
> 
>     if (mktime(&time_str) == -1)
> 
>   to:
> 
>     time_str.tm_wday = -1;
>     if (mktime(&time_str) == (time_t)-1 && time_str.tm_wday == -1)

Yep, they do that change, although I haven't been able to find a
discussion that justifies that change.  :|  (Anyway, I think the change
is good.  I'm only perplex not finding the justification.)

> This is the test I suggested: a check that mktime() returns -1,

I think that test suggested by POSIX is bogus (redundant).  If mktime(3)
has failed, tm_wday is unchanged.  If it has succeeded, tm_wday must be
changed.  Thus, the return value is meaningless for the purpose of
determining if it has failed.

> and since it can be a valid value, a second test on tm_wday
> (where the input, which is ignored, has an invalid value such
> as -1 here, or INT_MAX in your case; note that -1 may be more
> efficient with some processors, and shorter to write).

I didn't use -1 because I thought some weird weeks might contain 8 days
(for some of those weird timezone adjustments), and that that might
cause wday -1 to actually exist.  Thus I chose INT_MIN, which is far far
away from valid values, to represent an error.  However, I assume that
they were careful enough to make sure that such weeks do not exist, so
yes, -1 will be better.  (And if it's not, we'll blame Austin.)  :)

Cheers,
Alex
Alejandro Colomar Aug. 23, 2024, 2:25 p.m. UTC | #11
Hi Brian,

On Fri, Aug 23, 2024 at 08:04:20AM GMT, Brian Inglis wrote:
> AFAICS from 9899 1998 Draft to 2023, the *mktime* wording has been essentially:

I find the above confusing.  What is 9899 1998?  The draft is for
ISO/IEC 9899:2024.  There's no 1998 in the name.

> 
> "The original values of the tm_wday and tm_yday components of the structure
> are ignored, and the original values of the other components are not
> restricted to the ranges indicated above. On successful completion, the
> values of the tm_wday and tm_yday components of the structure are set
> appropriately,"

The text you've quoted is there since ISO C89.
<https://port70.net/~nsz/c/c89/c89-draft.html#4.12.2.3>

And that is not guarantee enough.  It says that wday and yday are
ignored (for the purposes of determining the return value).  And it says
that on success it modifies them.  But that text is silent about what
happens on error.

It is C23 in 7.19.2.3p3 which provides a novel guarantee, that those
fields won't be modified on a failed call.

> 
> so the recommendation has been to do something like (pointless example):
> 
> 	time_t tt = time(&tt);
> 	struct tm *tm = localtime(&tt);
> 	tm->tm_wday = tm->tm_yday = -1;
> 	if ((tt = mktime(tm)) == -1 && tm->tm_wday == -1 && tm->tm_yday == -1)

This conditional, just like the one in the POSIX 2024 standard, is
redundant.  Reading tm_wday is enough for determining a failure.

Otherwise, what would you expect if tm_yday is 4 but tm_wday is -1?
Half an error?  :)

Cheers,
Alex
Vincent Lefevre Aug. 23, 2024, 3:26 p.m. UTC | #12
On 2024-08-23 16:18:21 +0200, Alejandro Colomar wrote:
> Hi Vincent,
> 
> On Fri, Aug 23, 2024 at 03:54:49PM GMT, Vincent Lefevre wrote:
> > On 2024-08-23 15:12:16 +0200, Alejandro Colomar wrote:
> > > Looking at the WG14 document logs, it seems it was added in n3147:
> > > <https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3147.txt>
> > 
> > Thanks for the reference. Additional details can be found
> > in CD2 ballot at
> > 
> >   https://open-std.org/JTC1/SC22/WG14/www/docs/n3148.doc
> 
> It's interesting that WG14 claims that they're not aware of any existing
> implementations that would modify tm_wday on failure.

AFAIK, this is not a claim from WG14, but from the one who submitted
the GB-159 comment. The claim is

  There is some existing practice where application code sets tm_wday
  to an out-of-range sentinel value and checks whether it was changed
  by mktime, and we are not aware of any implementation where this
  does not work.

and this is rather vague: we do not know whether this existing practice
is common and which implementations have been checked.

> Although it's weird, because WG14 attributes that claim to the Austin
> Group, and

The comment attributes the issues to the Austin Group, but perhaps
not all the details.

> > which references the POSIX bug
> > 
> >   https://austingroupbugs.net/view.php?id=1614
> 
> I don't see any discussion about tm_wday in that Austin Group bug.  :|
> Maybe it happened in a mailing list or elsewhere.

Yes, perhaps in the austin-group-l mailing-list.

> (If any implementation does not conform, at least it should
> be feasible to fix that implementation to conform.)

That's something new in the future C23 standard. So I don't think
that older implementations (stable releases) would change.

> > This is the test I suggested: a check that mktime() returns -1,
> 
> I think that test suggested by POSIX is bogus (redundant).  If mktime(3)
> has failed, tm_wday is unchanged.  If it has succeeded, tm_wday must be
> changed.  Thus, the return value is meaningless for the purpose of
> determining if it has failed.

Yes, after some thoughts, I agree.

However, it should be said that with pre-C23 implementations,
it is not guaranteed to detect failures.

Said otherwise, the change from

  if (mktime(&time_str) == -1)

to

  if (time_str.tm_wday == -1)

will avoid spurious failures (the case where -1 is a valid calendar
value), but it might make some failures be undetected, though no
implementations with such an issue are known.

> > and since it can be a valid value, a second test on tm_wday
> > (where the input, which is ignored, has an invalid value such
> > as -1 here, or INT_MAX in your case; note that -1 may be more
> > efficient with some processors, and shorter to write).
> 
> I didn't use -1 because I thought some weird weeks might contain 8 days
> (for some of those weird timezone adjustments), and that that might
> cause wday -1 to actually exist.

This is invalid and could cause crashes in programs, or worse.
In C17:

  int tm_wday; // days since Sunday -- [0, 6]
Alejandro Colomar Aug. 23, 2024, 5:48 p.m. UTC | #13
[CC += austin-group-l@opengroup.org]

(Although I'm not sure if that mailing list is open.  maybe it rejects
 mail from this account.  Please anyone bounce it if you find that it
 doesn't reach the list.)

[CC += Robert, Geoff, Andrew]

Hi Vincent, Robert, Geoff, Andrew,

On Fri, Aug 23, 2024 at 05:26:17PM GMT, Vincent Lefevre wrote:
> On 2024-08-23 16:18:21 +0200, Alejandro Colomar wrote:
> > Hi Vincent,
> > 
> > On Fri, Aug 23, 2024 at 03:54:49PM GMT, Vincent Lefevre wrote:
> > > On 2024-08-23 15:12:16 +0200, Alejandro Colomar wrote:
> > > > Looking at the WG14 document logs, it seems it was added in n3147:
> > > > <https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3147.txt>
> > > 
> > > Thanks for the reference. Additional details can be found
> > > in CD2 ballot at
> > > 
> > >   https://open-std.org/JTC1/SC22/WG14/www/docs/n3148.doc
> > 
> > It's interesting that WG14 claims that they're not aware of any existing
> > implementations that would modify tm_wday on failure.
> 
> AFAIK, this is not a claim from WG14, but from the one who submitted
> the GB-159 comment. The claim is

True.

>   There is some existing practice where application code sets tm_wday
>   to an out-of-range sentinel value and checks whether it was changed
>   by mktime, and we are not aware of any implementation where this
>   does not work.
> 
> and this is rather vague: we do not know whether this existing practice
> is common

From what I can find with Debian's <https://codesearch.debian.net>,
there's more than one.  Still negligible compared to the amount of
bogus checks comparing the return value with -1, but a decent number.
I'm not sure we can call that common, though.

	boost
	python
	ceph

And one project has the redundant code suggested by POSIX:

	pypy

> and which implementations have been checked.

Robert, Geoff, Andrew, can you please clarify?

> > Although it's weird, because WG14 attributes that claim to the Austin
> > Group, and
> 
> The comment attributes the issues to the Austin Group, but perhaps
> not all the details.
> 
> > > which references the POSIX bug
> > > 
> > >   https://austingroupbugs.net/view.php?id=1614
> > 
> > I don't see any discussion about tm_wday in that Austin Group bug.  :|
> > Maybe it happened in a mailing list or elsewhere.
> 
> Yes, perhaps in the austin-group-l mailing-list.

Robert, Geoff, Andrew, can you please clarify where is the discussion
that led to the following change?:

On page 1331 line 44327 section mktime(), change:

    The mktime() function shall return the specified time since the Epoch encoded as a value of type time_t. If the time since the Epoch cannot be represented, the function shall return the value (time_t)-1 [CX]and set errno to indicate the error[/CX].

to:

    The mktime() function shall return the calculated time since the Epoch encoded as a value of type time_t. If the time since the Epoch cannot be represented as a time_t [CX]or the value to be returned in the tm_year member of the structure pointed to by timeptr cannot be represented as an int[/CX], the function shall return the value (time_t)-1 [CX]and set errno to [EOVERFLOW], and shall not change the value of the tm_wday component of the structure.[/CX]

    [CX]Since (time_t)-1 is a valid return value for a successful call to mktime(), an application wishing to check for error situations should set tm_wday to a value less than 0 or greater than 6 before calling mktime(). On return, if tm_wday has not changed an error has occurred.[/CX]

We haven't found any mention at all in the Austin bug #1614, other than
the change itself, which is weird.  There must have been something
elsewhere.  (I do like the change, but I'd like to understand what
research was done.)

> > (If any implementation does not conform, at least it should
> > be feasible to fix that implementation to conform.)
> 
> That's something new in the future C23 standard.

(And in POSIX.1-2024.)

> So I don't think
> that older implementations (stable releases) would change.

You could argue with them that there's no way to test for errors other
than this method, so if they don't fix their implementations, they are
effectively broken.  Or you may need to assume that under older
standards, it was impossible to check for mktime(3) errors.  Do not use
implementations that conform to broken standards.  :)

> > > This is the test I suggested: a check that mktime() returns -1,
> > 
> > I think that test suggested by POSIX is bogus (redundant).  If mktime(3)
> > has failed, tm_wday is unchanged.  If it has succeeded, tm_wday must be
> > changed.  Thus, the return value is meaningless for the purpose of
> > determining if it has failed.
> 
> Yes, after some thoughts, I agree.
> 
> However, it should be said that with pre-C23 implementations,
> it is not guaranteed to detect failures.

Under a pre-C23 (and pre-POSIX.1-2024) implementation, there was no
method that would guarantee detecting failures.  Or rather, there was no
method that would be free of false positives.  You could always test the
return for -1, and reject some valid dates.  Choose your poison.

> Said otherwise, the change from
> 
>   if (mktime(&time_str) == -1)
> 
> to
> 
>   if (time_str.tm_wday == -1)
> 
> will avoid spurious failures (the case where -1 is a valid calendar
> value), but it might make some failures be undetected, though no
> implementations with such an issue are known.
> 
> > > and since it can be a valid value, a second test on tm_wday
> > > (where the input, which is ignored, has an invalid value such
> > > as -1 here, or INT_MAX in your case; note that -1 may be more
> > > efficient with some processors, and shorter to write).
> > 
> > I didn't use -1 because I thought some weird weeks might contain 8 days
> > (for some of those weird timezone adjustments), and that that might
> > cause wday -1 to actually exist.
> 
> This is invalid and could cause crashes in programs, or worse.
> In C17:
> 
>   int tm_wday; // days since Sunday -- [0, 6]

Hmmm, nice.  I guess if a weekday was ever added, it would be given a
name, and thus it would probably either repeat one of the sevenn, or be
an eigth day, but not a -1th day.  (Even if just because politicians
don't think of negative week days.)  So I guess it's safe to assume that
tm_wday -1 will never exist.

Good.

Have a lovely day!
Alex
Brian Inglis Aug. 23, 2024, 6:31 p.m. UTC | #14
On 2024-08-23 08:25, Alejandro Colomar wrote:
> Hi Brian,
> 
> On Fri, Aug 23, 2024 at 08:04:20AM GMT, Brian Inglis wrote:
>> AFAICS from 9899 1998 Draft to 2023, the *mktime* wording has been essentially:
> 
> I find the above confusing.  What is 9899 1998?  The draft is for
> ISO/IEC 9899:2024.  There's no 1998 in the name.

Sorry for the typo - from the original *1988* Draft from ANSI X3J11 88-090 
1988-05-13 of ANSI/ISO/IEC 9899:1990 through 2023.

>> "The original values of the tm_wday and tm_yday components of the structure
>> are ignored, and the original values of the other components are not
>> restricted to the ranges indicated above. On successful completion, the
>> values of the tm_wday and tm_yday components of the structure are set
>> appropriately,"
> 
> The text you've quoted is there since ISO C89.
> <https://port70.net/~nsz/c/c89/c89-draft.html#4.12.2.3>
> 
> And that is not guarantee enough.  It says that wday and yday are
> ignored (for the purposes of determining the return value).  And it says
> that on success it modifies them.  But that text is silent about what
> happens on error.

It states they are set on successful completion, and implied left alone if not.

> It is C23 in 7.19.2.3p3 which provides a novel guarantee, that those
              ^^^^^^^^^^
              7.29.2.3.p3

See "WD" https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3220.pdf#page=415

> fields won't be modified on a failed call.
> 
>>
>> so the recommendation has been to do something like (pointless example):
>>
>> 	time_t tt = time(&tt);
>> 	struct tm *tm = localtime(&tt);
>> 	tm->tm_wday = tm->tm_yday = -1;
>> 	if ((tt = mktime(tm)) == -1 && tm->tm_wday == -1 && tm->tm_yday == -1)
> 
> This conditional, just like the one in the POSIX 2024 standard, is
> redundant.  Reading tm_wday is enough for determining a failure.
> 
> Otherwise, what would you expect if tm_yday is 4 but tm_wday is -1? Half an error? :)

Checking both are still untouched guarantees an error!
If one is set, it's a QoI bug, not an error ;^>
Paul Eggert Aug. 23, 2024, 7:08 p.m. UTC | #15
On 2024-08-23 10:48, Alejandro Colomar wrote:
> Robert, Geoff, Andrew, can you please clarify where is the discussion
> that led to the following change?:

Why does it matter? The tm_wday idea has worked everywhere for decades 
and is now standardized.

For what it's worth, GNU Emacs has been using the tm_wday idea since 
2018, when I made the following change:

https://git.savannah.gnu.org/cgit/emacs.git/commit/?id=b4eb908f858284a7962851fd99c94598f76afa6f

and many GNU tools also use the idea, because I made a similar change to 
Gnulib too:

https://git.savannah.gnu.org/cgit/gnulib.git/commit/?id=6ccfbb4ce5d4fa79f7afb48f3648f2e0401523c3
Hans Åberg Aug. 23, 2024, 8:09 p.m. UTC | #16
> On 23 Aug 2024, at 21:08, Paul Eggert via austin-group-l at The Open Group <austin-group-l@opengroup.org> wrote:
> 
> On 2024-08-23 10:48, Alejandro Colomar wrote:
>> Robert, Geoff, Andrew, can you please clarify where is the discussion
>> that led to the following change?:
> 
> Why does it matter? The tm_wday idea has worked everywhere for decades and is now standardized.
> 
> For what it's worth, GNU Emacs has been using the tm_wday idea since 2018, when I made the following change:

I am a bit curious about the original discussion, which I cannot see:

I have recently brushed up a C++ program I wrote that uses the Julian Day Number (JDN) and Julian date, which is used in astronomy. So, for example, 0 JDN, the Julian calendar date -4712-01-01, is a Monday, making it easy to find the day of the week, computing modulo 7. The Julian date 0.0 is noon UT on 0 JDN.

https://en.wikipedia.org/wiki/Julian_day
Alejandro Colomar Aug. 23, 2024, 8:27 p.m. UTC | #17
Hi Brian,

On Fri, Aug 23, 2024 at 12:31:01PM GMT, Brian Inglis wrote:
> On 2024-08-23 08:25, Alejandro Colomar wrote:
> > Hi Brian,
> > 
> > On Fri, Aug 23, 2024 at 08:04:20AM GMT, Brian Inglis wrote:
> > > AFAICS from 9899 1998 Draft to 2023, the *mktime* wording has been essentially:
> > 
> > I find the above confusing.  What is 9899 1998?  The draft is for
> > ISO/IEC 9899:2024.  There's no 1998 in the name.
> 
> Sorry for the typo - from the original *1988* Draft from ANSI X3J11 88-090
> 1988-05-13 of ANSI/ISO/IEC 9899:1990 through 2023.

Ahh, thanks!

> 
> > > "The original values of the tm_wday and tm_yday components of the structure
> > > are ignored, and the original values of the other components are not
> > > restricted to the ranges indicated above. On successful completion, the
> > > values of the tm_wday and tm_yday components of the structure are set
> > > appropriately,"
> > 
> > The text you've quoted is there since ISO C89.
> > <https://port70.net/~nsz/c/c89/c89-draft.html#4.12.2.3>
> > 
> > And that is not guarantee enough.  It says that wday and yday are
> > ignored (for the purposes of determining the return value).  And it says
> > that on success it modifies them.  But that text is silent about what
> > happens on error.
> 
> It states they are set on successful completion, and implied left alone if not.

ISO C leaves everything not explicitly stated as Undefined Behavior.  :)
Implied left alone is too optimistic.

> 
> > It is C23 in 7.19.2.3p3 which provides a novel guarantee, that those
>              ^^^^^^^^^^
>              7.29.2.3.p3

Oops typo.  :)

> 
> See "WD" https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3220.pdf#page=415
> 
> > fields won't be modified on a failed call.
> > 
> > > 
> > > so the recommendation has been to do something like (pointless example):
> > > 
> > > 	time_t tt = time(&tt);
> > > 	struct tm *tm = localtime(&tt);
> > > 	tm->tm_wday = tm->tm_yday = -1;
> > > 	if ((tt = mktime(tm)) == -1 && tm->tm_wday == -1 && tm->tm_yday == -1)
> > 
> > This conditional, just like the one in the POSIX 2024 standard, is
> > redundant.  Reading tm_wday is enough for determining a failure.
> > 
> > Otherwise, what would you expect if tm_yday is 4 but tm_wday is -1? Half an error? :)
> 
> Checking both are still untouched guarantees an error!
> If one is set, it's a QoI bug, not an error ;^>

Cheers,
Alex
Alejandro Colomar Aug. 23, 2024, 8:34 p.m. UTC | #18
Hi Paul,

On Fri, Aug 23, 2024 at 12:08:36PM GMT, Paul Eggert wrote:
> On 2024-08-23 10:48, Alejandro Colomar wrote:
> > Robert, Geoff, Andrew, can you please clarify where is the discussion
> > that led to the following change?:
> 
> Why does it matter? The tm_wday idea has worked everywhere for decades and
> is now standardized.
> 
> For what it's worth, GNU Emacs has been using the tm_wday idea since 2018,
> when I made the following change:
> 
> https://git.savannah.gnu.org/cgit/emacs.git/commit/?id=b4eb908f858284a7962851fd99c94598f76afa6f
> 
> and many GNU tools also use the idea, because I made a similar change to
> Gnulib too:
> 
> https://git.savannah.gnu.org/cgit/gnulib.git/commit/?id=6ccfbb4ce5d4fa79f7afb48f3648f2e0401523c3

Great!  That gives me more confidence.  Thanks a lot.  :)

Have a lovely night!
Alex
diff mbox series

Patch

diff --git a/man/man3/ctime.3 b/man/man3/ctime.3
index 5aec51b79..5ab881978 100644
--- a/man/man3/ctime.3
+++ b/man/man3/ctime.3
@@ -412,6 +412,163 @@  .SH NOTES
 object types may overwrite the information in any object of the same type
 pointed to by the value returned from any previous call to any of them."
 This can occur in the glibc implementation.
+.SH EXAMPLES
+Passing an invalid time to
+.BR mktime ()
+or an invalid
+.I tm->tm_isdst
+value
+yields unspecified results.
+Also,
+passing the value
+.I \-1
+in
+.I tm->tm_isdst
+will result in an ambiguous time during some DST transitions,
+which will also yield an unspecified result.
+.P
+The program below uses a wrapper that
+allows detecting invalid and ambiguous times,
+with
+.B EINVAL
+and
+.BR ENOTUNIQ ,
+respectively.
+.P
+The following shell session shows sample runs of the program:
+.P
+.in +4n
+.EX
+.RB $\~ "export TZ=Europe/Madrid" ;
+$
+.RB $\~ "./a.out 2024 08 23 00 17 53 \-1" ;
+1724365073
+.RB $\~ "./a.out 2024 08 23 00 17 53 0" ;
+a.out: mktime: Invalid argument
+1724368673
+.RB $\~ "./a.out 2024 08 23 00 17 53 1" ;
+1724365073
+$
+.RB $\~ "./a.out 2024 02 23 00 17 53 \-1" ;
+1708643873
+.RB $\~ "./a.out 2024 02 23 00 17 53 0" ;
+1708643873
+.RB $\~ "./a.out 2024 02 23 00 17 53 1" ;
+a.out: mktime: Invalid argument
+1708640273
+$
+.RB $\~ "./a.out 2024 03 26 02 17 53 \-1" ;
+a.out: mktime: Invalid argument
+1679793473
+$
+.RB $\~ "./a.out 2024 10 29 02 17 53 \-1" ;
+a.out: mktime: Name not unique on network
+1698542273
+.RB $\~ "./a.out 2024 10 29 02 17 53 0" ;
+1698542273
+.RB $\~ "./a.out 2024 10 29 02 17 53 1" ;
+1698538673
+$
+.RB $\~ "./a.out 2024 02 29 12 00 00 \-1" ;
+a.out: mktime: Invalid argument
+1677668400
+.EE
+.SS Program source: mktime.c
+\&
+.\" SRC BEGIN (mktime.c)
+.EX
+#include <err.h>
+#include <errno.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+\&
+time_t my_mktime(struct tm *tp);
+\&
+int
+main(int argc, char *argv[])
+{
+    char       **p;
+    time_t     t;
+    struct tm  tm;
+\&
+    if (argc != 8) {
+        fprintf(stderr, "Usage: %s yyyy mm dd HH MM SS isdst\[rs]n", argv[0]);
+        exit(EXIT_FAILURE);
+    }
+\&
+    p = &argv[1];
+    tm.tm_year  = atoi(*p++) \- 1900;
+    tm.tm_mon   = atoi(*p++) \- 1;
+    tm.tm_mday  = atoi(*p++);
+    tm.tm_hour  = atoi(*p++);
+    tm.tm_min   = atoi(*p++);
+    tm.tm_sec   = atoi(*p++);
+    tm.tm_isdst = atoi(*p++);
+\&
+    errno = 0;
+    t = my_mktime(&tm);
+    if (errno == EOVERFLOW)
+        err(EXIT_FAILURE, "mktime");
+    if (errno == EINVAL || errno == ENOTUNIQ)
+        warn("mktime");
+\&
+    printf("%ju\[rs]n", (uintmax_t) t);
+    exit(EXIT_SUCCESS);
+}
+\&
+time_t
+my_mktime(struct tm *tp)
+{
+    int        e, isdst;
+    time_t     t;
+    struct tm  tm;
+\&
+    e = errno;
+    errno = 0;
+\&
+    tm = *tp;
+    isdst = tp\->tm_isdst;
+\&
+    t = mktime(tp);
+    if (t == \-1 && errno == EOVERFLOW)
+        return \-1;
+\&
+    if (isdst == \-1)
+        tm.tm_isdst = tp\->tm_isdst;
+\&
+    if (   tm.tm_sec   != tp\->tm_sec
+        || tm.tm_min   != tp\->tm_min
+        || tm.tm_hour  != tp\->tm_hour
+        || tm.tm_mday  != tp\->tm_mday
+        || tm.tm_mon   != tp\->tm_mon
+        || tm.tm_year  != tp\->tm_year
+        || tm.tm_isdst != tp\->tm_isdst)
+    {
+        errno = EINVAL;
+        return t;
+    }
+\&
+    if (isdst != \-1)
+        goto out;
+\&
+    tm = *tp;
+    tm.tm_isdst = !tm.tm_isdst;
+\&
+    if (mktime(&tm) == \-1 && errno == EOVERFLOW)
+        goto out;
+\&
+    if (tm.tm_isdst != tp\->tm_isdst) {
+        errno = ENOTUNIQ;
+        return t;
+    }
+out:
+    errno = e;
+    return t;
+}
+.EE
+.\" SRC END
 .SH SEE ALSO
 .BR date (1),
 .BR gettimeofday (2),