diff mbox series

[pushed,1/2] diagnostics: fixes to SARIF output [PR109360]

Message ID 20240621125536.1134456-1-dmalcolm@redhat.com
State New
Headers show
Series [pushed,1/2] diagnostics: fixes to SARIF output [PR109360] | expand

Commit Message

David Malcolm June 21, 2024, 12:55 p.m. UTC
When adding validation of .sarif files against the schema
(PR testsuite/109360) I discovered various issues where we were
generating invalid .sarif files.

Specifically, in
  c-c++-common/diagnostic-format-sarif-file-bad-utf8-pr109098-1.c
the relatedLocations for the "note" diagnostics were missing column
numbers, leading to validation failure due to non-unique elements,
such as multiple:
	"message": {"text": "invalid UTF-8 character <bf>"}},
on line 25 with no column information.

Root cause is that for some diagnostics in libcpp we have a location_t
representing the line as a whole, setting a column_override on the
rich_location (since the line hasn't been fully read yet).  We were
handling this column override for plain text output, but not for .sarif
output.

Similarly, in diagnostic-format-sarif-file-pr111700.c there is a warning
emitted on "line 0" of the file, whereas SARIF requires line numbers to
be positive.

We also use column == 0 internally to mean "the line as a whole",
whereas SARIF required column numbers to be positive.

This patch fixes these various issues.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Successful run of analyzer integration tests on x86_64-pc-linux-gnu.
Pushed to trunk as r15-1540-g9f4fdc3acebcf6.

gcc/ChangeLog:
	PR testsuite/109360
	* diagnostic-format-sarif.cc
	(sarif_builder::make_location_object): Pass any column override
	from rich_loc to maybe_make_physical_location_object.
	(sarif_builder::maybe_make_physical_location_object): Add
	"column_override" param and pass it to maybe_make_region_object.
	(sarif_builder::maybe_make_region_object): Add "column_override"
	param and use it when the location has 0 for a column.  Don't
	add "startLine", "startColumn", "endLine", or "endColumn" if
	the values aren't positive.
	(sarif_builder::maybe_make_region_object_for_context): Don't
	add "startLine" or "endLine" if the values aren't positive.

libcpp/ChangeLog:
	PR testsuite/109360
	* include/rich-location.h (rich_location::get_column_override):
	New accessor.

Signed-off-by: David Malcolm <dmalcolm@redhat.com>
---
 gcc/diagnostic-format-sarif.cc | 75 ++++++++++++++++++++++++----------
 libcpp/include/rich-location.h |  2 +
 2 files changed, 56 insertions(+), 21 deletions(-)
diff mbox series

Patch

diff --git a/gcc/diagnostic-format-sarif.cc b/gcc/diagnostic-format-sarif.cc
index 79116f051bc1..acf2aa875c48 100644
--- a/gcc/diagnostic-format-sarif.cc
+++ b/gcc/diagnostic-format-sarif.cc
@@ -243,11 +243,13 @@  private:
   json::array *maybe_make_kinds_array (diagnostic_event::meaning m) const;
   json::object *
   maybe_make_physical_location_object (location_t loc,
-				       enum diagnostic_artifact_role role);
+				       enum diagnostic_artifact_role role,
+				       int column_override);
   json::object *make_artifact_location_object (location_t loc);
   json::object *make_artifact_location_object (const char *filename);
   json::object *make_artifact_location_object_for_pwd () const;
-  json::object *maybe_make_region_object (location_t loc) const;
+  json::object *maybe_make_region_object (location_t loc,
+					  int column_override) const;
   json::object *maybe_make_region_object_for_context (location_t loc) const;
   json::object *make_region_object_for_hint (const fixit_hint &hint) const;
   json::object *make_multiformat_message_string (const char *msg) const;
@@ -924,8 +926,9 @@  sarif_builder::make_location_object (const rich_location &rich_loc,
   location_t loc = rich_loc.get_loc ();
 
   /* "physicalLocation" property (SARIF v2.1.0 section 3.28.3).  */
-  if (json::object *phs_loc_obj = maybe_make_physical_location_object (loc,
-								       role))
+  if (json::object *phs_loc_obj
+	= maybe_make_physical_location_object (loc, role,
+					       rich_loc.get_column_override ()))
     location_obj->set ("physicalLocation", phs_loc_obj);
 
   /* "logicalLocations" property (SARIF v2.1.0 section 3.28.4).  */
@@ -946,7 +949,7 @@  sarif_builder::make_location_object (const diagnostic_event &event,
   /* "physicalLocation" property (SARIF v2.1.0 section 3.28.3).  */
   location_t loc = event.get_location ();
   if (json::object *phs_loc_obj
-	= maybe_make_physical_location_object (loc, role))
+	= maybe_make_physical_location_object (loc, role, 0))
     location_obj->set ("physicalLocation", phs_loc_obj);
 
   /* "logicalLocations" property (SARIF v2.1.0 section 3.28.4).  */
@@ -961,7 +964,10 @@  sarif_builder::make_location_object (const diagnostic_event &event,
   return location_obj;
 }
 
-/* Make a physicalLocation object (SARIF v2.1.0 section 3.29) for LOC,
+/* Make a physicalLocation object (SARIF v2.1.0 section 3.29) for LOC.
+
+   If COLUMN_OVERRIDE is non-zero, then use it as the column number
+   if LOC has no column information.
 
    Ensure that we have an artifact object for the file, adding ROLE to it,
    and flagging that we will attempt to embed the contents of the artifact
@@ -970,7 +976,8 @@  sarif_builder::make_location_object (const diagnostic_event &event,
 json::object *
 sarif_builder::
 maybe_make_physical_location_object (location_t loc,
-				     enum diagnostic_artifact_role role)
+				     enum diagnostic_artifact_role role,
+				     int column_override)
 {
   if (loc <= BUILTINS_LOCATION || LOCATION_FILE (loc) == NULL)
     return NULL;
@@ -983,7 +990,8 @@  maybe_make_physical_location_object (location_t loc,
   get_or_create_artifact (LOCATION_FILE (loc), role, true);
 
   /* "region" property (SARIF v2.1.0 section 3.29.4).  */
-  if (json::object *region_obj = maybe_make_region_object (loc))
+  if (json::object *region_obj = maybe_make_region_object (loc,
+							   column_override))
     phys_loc_obj->set ("region", region_obj);
 
   /* "contextRegion" property (SARIF v2.1.0 section 3.29.5).  */
@@ -1089,10 +1097,14 @@  sarif_builder::get_sarif_column (expanded_location exploc) const
 }
 
 /* Make a region object (SARIF v2.1.0 section 3.30) for LOC,
-   or return NULL.  */
+   or return NULL.
+
+   If COLUMN_OVERRIDE is non-zero, then use it as the column number
+   if LOC has no column information.  */
 
 json::object *
-sarif_builder::maybe_make_region_object (location_t loc) const
+sarif_builder::maybe_make_region_object (location_t loc,
+					 int column_override) const
 {
   location_t caret_loc = get_pure_location (loc);
 
@@ -1114,21 +1126,40 @@  sarif_builder::maybe_make_region_object (location_t loc) const
   json::object *region_obj = new json::object ();
 
   /* "startLine" property (SARIF v2.1.0 section 3.30.5) */
-  region_obj->set_integer ("startLine", exploc_start.line);
+  if (exploc_start.line > 0)
+    region_obj->set_integer ("startLine", exploc_start.line);
 
-  /* "startColumn" property (SARIF v2.1.0 section 3.30.6) */
-  region_obj->set_integer ("startColumn", get_sarif_column (exploc_start));
+  /* "startColumn" property (SARIF v2.1.0 section 3.30.6).
+
+     We use column == 0 to mean the whole line, so omit the column
+     information for this case, unless COLUMN_OVERRIDE is non-zero,
+     (for handling certain awkward lexer diagnostics)  */
+
+  if (exploc_start.column == 0 && column_override)
+    /* Use the provided column number.  */
+    exploc_start.column = column_override;
+
+  if (exploc_start.column > 0)
+    {
+      int start_column = get_sarif_column (exploc_start);
+      region_obj->set_integer ("startColumn", start_column);
+    }
 
   /* "endLine" property (SARIF v2.1.0 section 3.30.7) */
-  if (exploc_finish.line != exploc_start.line)
+  if (exploc_finish.line != exploc_start.line
+      && exploc_finish.line > 0)
     region_obj->set_integer ("endLine", exploc_finish.line);
 
   /* "endColumn" property (SARIF v2.1.0 section 3.30.8).
-     This expresses the column immediately beyond the range.  */
-  {
-    int next_column = sarif_builder::get_sarif_column (exploc_finish) + 1;
-    region_obj->set_integer ("endColumn", next_column);
-  }
+     This expresses the column immediately beyond the range.
+
+     We use column == 0 to mean the whole line, so omit the column
+     information for this case.  */
+  if (exploc_finish.column > 0)
+    {
+      int next_column = get_sarif_column (exploc_finish) + 1;
+      region_obj->set_integer ("endColumn", next_column);
+    }
 
   return region_obj;
 }
@@ -1164,10 +1195,12 @@  sarif_builder::maybe_make_region_object_for_context (location_t loc) const
   json::object *region_obj = new json::object ();
 
   /* "startLine" property (SARIF v2.1.0 section 3.30.5) */
-  region_obj->set_integer ("startLine", exploc_start.line);
+  if (exploc_start.line > 0)
+    region_obj->set_integer ("startLine", exploc_start.line);
 
   /* "endLine" property (SARIF v2.1.0 section 3.30.7) */
-  if (exploc_finish.line != exploc_start.line)
+  if (exploc_finish.line != exploc_start.line
+      && exploc_finish.line > 0)
     region_obj->set_integer ("endLine", exploc_finish.line);
 
   /* "snippet" property (SARIF v2.1.0 section 3.30.13).  */
diff --git a/libcpp/include/rich-location.h b/libcpp/include/rich-location.h
index be424cb4b65f..cc4f888f458b 100644
--- a/libcpp/include/rich-location.h
+++ b/libcpp/include/rich-location.h
@@ -514,6 +514,8 @@  class rich_location
 
   const line_maps *get_line_table () const { return m_line_table; }
 
+  int get_column_override () const { return m_column_override; }
+
 private:
   bool reject_impossible_fixit (location_t where);
   void stop_supporting_fixits ();