diff mbox series

stdio-common: Fix scanf parsing for NaN types [BZ #30647]

Message ID 20241018112325.1515086-1-avinal.xlvii@gmail.com
State New
Headers show
Series stdio-common: Fix scanf parsing for NaN types [BZ #30647] | expand

Commit Message

Avinal Kumar Oct. 18, 2024, 11:20 a.m. UTC
The scanf family of functions like sscanf and fscanf currently
ignore nan() and nan(n-char-sequence).  This happens because
__vfscanf_internal only checks for 'nan'.

This commit adds support for all valid nan types i.e.  nan, nan()
and nan(n-char-sequence), where n-char-sequence can be
[a-zA-Z0-9_]+, thus fixing the bug 30647.  Any other representation
of NaN should result in conversion error.

New tests are also added to verify the correct parsing of NaN types.

Signed-off-by: Avinal Kumar <avinal.xlvii@gmail.com>
---
Please refer https://sourceware.org/bugzilla/show_bug.cgi?id=30647

 stdio-common/Makefile           |  1 +
 stdio-common/tst-scanf-nan.c    | 81 +++++++++++++++++++++++++++++++++
 stdio-common/vfscanf-internal.c | 49 +++++++++++++++++++-
 3 files changed, 130 insertions(+), 1 deletion(-)
 create mode 100644 stdio-common/tst-scanf-nan.c
diff mbox series

Patch

diff --git a/stdio-common/Makefile b/stdio-common/Makefile
index 88105b3c1b..a166eb7cf8 100644
--- a/stdio-common/Makefile
+++ b/stdio-common/Makefile
@@ -261,6 +261,7 @@  tests := \
   tst-scanf-binary-gnu89 \
   tst-scanf-bz27650 \
   tst-scanf-intn \
+  tst-scanf-nan \
   tst-scanf-round \
   tst-scanf-to_inpunct \
   tst-setvbuf1 \
diff --git a/stdio-common/tst-scanf-nan.c b/stdio-common/tst-scanf-nan.c
new file mode 100644
index 0000000000..53658ecc9a
--- /dev/null
+++ b/stdio-common/tst-scanf-nan.c
@@ -0,0 +1,81 @@ 
+/* Test scanf formats for nan, nan(), nan(n-char-sequence) types.
+   Copyright The GNU Toolchain Authors.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <errno.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <support/check.h>
+
+#define CHECK_SCANF_RET(OK, STR, FMT, ...)                                    \
+  do                                                                          \
+    {                                                                         \
+      int ret = sscanf (STR, FMT, __VA_ARGS__);                               \
+      TEST_VERIFY (ret == (OK));                                              \
+    }                                                                         \
+  while (0)
+
+/* Valid nan types:
+   1. nan
+   2. nan()
+   3. nan([a-zA-Z0-9_]+)
+   Any other nan format is invalid and should produce a conversion error.
+   The return value denotes the number of valid conversions.  On conversion
+   error the rest of the input is discarded.  */
+static int
+do_test (void)
+{
+  double a, b, c;
+  int d;
+
+  /* All valid inputs.  */
+  CHECK_SCANF_RET (1, "nan", "%lf", &a);
+  CHECK_SCANF_RET (1, "nan()", "%lf", &a);
+  CHECK_SCANF_RET (1, "nan(12345)", "%lf", &a);
+  CHECK_SCANF_RET (2, "nan12", "%lf%d", &a, &d);
+  CHECK_SCANF_RET (2, "nan nan()", "%lf%lf", &a, &b);
+  CHECK_SCANF_RET (2, "nan nan(12345foo)", "%lf%lf", &a, &b);
+  CHECK_SCANF_RET (3, "nan nan() 12.234", "%lf%lf%lf", &a, &b, &c);
+  CHECK_SCANF_RET (4, "nannan()nan(foo)1234", "%lf%lf%lf%d", &a, &b, &c, &d);
+
+  /* Partially valid inputs.  */
+  CHECK_SCANF_RET (1, "nan( )", "%3lf", &a);
+  CHECK_SCANF_RET (1, "nan nan(", "%lf%lf", &a, &b);
+
+  /* Invalid inputs.  */
+
+  /* Dangling parentheses.  */
+  CHECK_SCANF_RET (0, "nan(", "%lf", &a);
+  CHECK_SCANF_RET (0, "nan(12345", "%lf", &a);
+  CHECK_SCANF_RET (0, "nan(12345", "%lf%d", &a, &d);
+
+  /* Field width is not sufficient for valid conversion.  */
+  CHECK_SCANF_RET (0, "nan()", "%4lf", &a);
+
+  /* Space is not a valid character.  */
+  CHECK_SCANF_RET (0, "nan( )", "%lf", &a);
+  CHECK_SCANF_RET (0, "nan( )12.34", "%lf%lf", &a, &b);
+  CHECK_SCANF_RET (0, "nan(12 foo)", "%lf", &a);
+
+  /* Period '.' is not a valid character.  */
+  CHECK_SCANF_RET (0, "nan(12.34) nan(FooBar)", "%lf%lf", &a, &b);
+
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/stdio-common/vfscanf-internal.c b/stdio-common/vfscanf-internal.c
index 1b82deffa7..e20048dded 100644
--- a/stdio-common/vfscanf-internal.c
+++ b/stdio-common/vfscanf-internal.c
@@ -2028,7 +2028,54 @@  digits_extended_fail:
 	      if (width > 0)
 		--width;
 	      char_buffer_add (&charbuf, c);
-	      /* It is "nan".  */
+	      /* It is at least "nan".  Now we check for nan() and
+	         nan(n-char-sequence).  */
+	      if (width != 0 && inchar () != EOF)
+		{
+		  if (c == L_ ('('))
+		    {
+		      if (width > 0)
+			--width;
+		      char_buffer_add (&charbuf, c);
+		      /* A '(' was observed, check for a closing ')', there
+			 may or may not be a n-char-sequence in between.  We
+			 have to check the longest prefix until there is a
+			 conversion error or closing parenthesis.  */
+		      do
+			{
+			  if (__builtin_expect (width == 0
+						|| inchar () == EOF, 0))
+			    {
+			      /* Conversion error because we ran out of
+				 characters.  */
+			      conv_error ();
+			      break;
+			    }
+			  if (!((c >= L_ ('0') && c <= L_ ('9'))
+				|| (c >= L_ ('A') && c <= L_ ('Z'))
+				|| (c >= L_ ('a') && c <= L_ ('z'))
+				|| c == L_ ('_') || c == L_ (')')))
+			    {
+			      /* Invalid character was observed.  Only valid
+				 characters are [a-zA-Z0-9_] and ')'.  */
+			      conv_error ();
+			      break;
+			    }
+			  if (width > 0)
+			    --width;
+			  char_buffer_add (&charbuf, c);
+
+			  /* The loop only exits successfully when ')' is the
+			     last character.  */
+			  if (c == L_ (')'))
+			    break;
+			}
+		      while (width != 0);
+		    }
+		  else
+		    /* It is only 'nan'.  */
+		    ungetc (c, s);
+		}
 	      goto scan_float;
 	    }
 	  else if (TOLOWER (c) == L_('i'))