diff mbox series

[pushed:,r15-3553] SARIF output: fix schema URL [ =?UTF-8?q?=C2=A73.13.3, =20PR116603]?=

Message ID 20240909234819.586863-1-dmalcolm@redhat.com
State New
Headers show
Series [pushed:,r15-3553] SARIF output: fix schema URL [ =?UTF-8?q?=C2=A73.13.3, =20PR116603]?= | expand

Commit Message

David Malcolm Sept. 9, 2024, 11:48 p.m. UTC
We were using
  https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json
as the URL for the SARIF 2.1 schema, but this is now a 404.

Update it to the URL listed in the spec (ยง3.13.3 "$schema property"),
which is:
  https://docs.oasis-open.org/sarif/sarif/v2.1.0/errata01/os/schemas/sarif-schema-2.1.0.json
and update the copy in
  gcc/testsuite/lib/sarif-schema-2.1.0.json
used by the "verify-sarif-file" DejaGnu directive to the version found at
that latter URL; the sha256 sum changes
from: 2b19d2358baef0251d7d24e208d05ffabf1b2a3ab5e1b3a816066fc57fd4a7e8
  to: c3b4bb2d6093897483348925aaa73af03b3e3f4bd4ca38cef26dcb4212a2682e

Doing so added a validation error on
  c-c++-common/diagnostic-format-sarif-file-pr111700.c
for which we emit this textual output:
  this-file-does-not-exist.c: warning: #warning message [-Wcpp]
with no line number, and these invalid SARIF regions within the
physical location of the warning:
  "region": {"startColumn": 2,
             "endColumn": 9},
  "contextRegion": {}

This is due to this directive:
  # 0 "this-file-does-not-exist.c"
with line number 0.

The patch fixes this by not creating regions that have startLine <= 0.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r15-3553-g38dc2c64710aa0.

gcc/ChangeLog:
	PR other/116603
	* diagnostic-format-sarif.cc (SARIF_SCHEMA): Update URL.
	(sarif_builder::maybe_make_region_object): Don't create regions
	with startLine <= 0.
	(sarif_builder::maybe_make_region_object_for_context): Likewise.

gcc/testsuite/ChangeLog:
	PR other/116603
	* gcc.dg/plugin/diagnostic-test-metadata-sarif.py (test_basics):
	Update expected schema URL.
	* gcc.dg/plugin/diagnostic-test-paths-multithreaded-sarif.py:
	Likewise.
	* gcc.dg/sarif-output/test-include-chain-1.py: Likewise.
	* gcc.dg/sarif-output/test-include-chain-2.py: Likewise.
	* gcc.dg/sarif-output/test-missing-semicolon.py: Likewise.
	* gcc.dg/sarif-output/test-no-diagnostics.py: Likewise.
	* gcc.dg/sarif-output/test-werror.py: Likewise.
	* lib/sarif-schema-2.1.0.json: Update with copy downloaded from
	https://docs.oasis-open.org/sarif/sarif/v2.1.0/errata01/os/schemas/sarif-schema-2.1.0.json

Signed-off-by: David Malcolm <dmalcolm@redhat.com>
---
 gcc/diagnostic-format-sarif.cc                | 25 +++++--
 .../plugin/diagnostic-test-metadata-sarif.py  |  2 +-
 ...agnostic-test-paths-multithreaded-sarif.py |  2 +-
 .../sarif-output/test-include-chain-1.py      |  2 +-
 .../sarif-output/test-include-chain-2.py      |  2 +-
 .../sarif-output/test-missing-semicolon.py    |  2 +-
 .../sarif-output/test-no-diagnostics.py       |  2 +-
 .../gcc.dg/sarif-output/test-werror.py        |  2 +-
 gcc/testsuite/lib/sarif-schema-2.1.0.json     | 73 ++++++++++++-------
 9 files changed, 72 insertions(+), 40 deletions(-)
diff mbox series

Patch

diff --git a/gcc/diagnostic-format-sarif.cc b/gcc/diagnostic-format-sarif.cc
index 9d9e7ae60734..e95f18f31bda 100644
--- a/gcc/diagnostic-format-sarif.cc
+++ b/gcc/diagnostic-format-sarif.cc
@@ -2221,7 +2221,10 @@  sarif_builder::get_sarif_column (expanded_location exploc) const
    or return nullptr.
 
    If COLUMN_OVERRIDE is non-zero, then use it as the column number
-   if LOC has no column information.  */
+   if LOC has no column information.
+
+   We only support text properties of regions ("text regions"),
+   not binary properties ("binary regions"); see 3.30.1.  */
 
 std::unique_ptr<sarif_region>
 sarif_builder::maybe_make_region_object (location_t loc,
@@ -2244,11 +2247,16 @@  sarif_builder::maybe_make_region_object (location_t loc,
   if (exploc_finish.file !=exploc_caret.file)
     return nullptr;
 
+  /* We can have line == 0 in the presence of "#" lines.
+     SARIF requires lines > 0, so if we hit this case we don't have a
+     way of validly representing the region as SARIF; bail out.  */
+  if (exploc_start.line <= 0)
+    return nullptr;
+
   auto region_obj = ::make_unique<sarif_region> ();
 
   /* "startLine" property (SARIF v2.1.0 section 3.30.5) */
-  if (exploc_start.line > 0)
-    region_obj->set_integer ("startLine", exploc_start.line);
+  region_obj->set_integer ("startLine", exploc_start.line);
 
   /* "startColumn" property (SARIF v2.1.0 section 3.30.6).
 
@@ -2316,11 +2324,16 @@  maybe_make_region_object_for_context (location_t loc,
   if (exploc_finish.file !=exploc_caret.file)
     return nullptr;
 
+  /* We can have line == 0 in the presence of "#" lines.
+     SARIF requires lines > 0, so if we hit this case we don't have a
+     way of validly representing the region as SARIF; bail out.  */
+  if (exploc_start.line <= 0)
+    return nullptr;
+
   auto region_obj = ::make_unique<sarif_region> ();
 
   /* "startLine" property (SARIF v2.1.0 section 3.30.5) */
-  if (exploc_start.line > 0)
-    region_obj->set_integer ("startLine", exploc_start.line);
+  region_obj->set_integer ("startLine", exploc_start.line);
 
   /* "endLine" property (SARIF v2.1.0 section 3.30.7) */
   if (exploc_finish.line != exploc_start.line
@@ -2627,7 +2640,7 @@  sarif_builder::make_multiformat_message_string (const char *msg) const
   return message_obj;
 }
 
-#define SARIF_SCHEMA "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json"
+#define SARIF_SCHEMA "https://docs.oasis-open.org/sarif/sarif/v2.1.0/errata01/os/schemas/sarif-schema-2.1.0.json"
 #define SARIF_VERSION "2.1.0"
 
 /* Make a top-level "sarifLog" object (SARIF v2.1.0 section 3.13).  */
diff --git a/gcc/testsuite/gcc.dg/plugin/diagnostic-test-metadata-sarif.py b/gcc/testsuite/gcc.dg/plugin/diagnostic-test-metadata-sarif.py
index 959e6f2e9942..2c3858786eca 100644
--- a/gcc/testsuite/gcc.dg/plugin/diagnostic-test-metadata-sarif.py
+++ b/gcc/testsuite/gcc.dg/plugin/diagnostic-test-metadata-sarif.py
@@ -13,7 +13,7 @@  def sarif():
 
 def test_basics(sarif):
     schema = sarif['$schema']
-    assert schema == "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json"
+    assert schema == "https://docs.oasis-open.org/sarif/sarif/v2.1.0/errata01/os/schemas/sarif-schema-2.1.0.json"
 
     version = sarif['version']
     assert version == "2.1.0"
diff --git a/gcc/testsuite/gcc.dg/plugin/diagnostic-test-paths-multithreaded-sarif.py b/gcc/testsuite/gcc.dg/plugin/diagnostic-test-paths-multithreaded-sarif.py
index cb00faf1532a..43d40cea372b 100644
--- a/gcc/testsuite/gcc.dg/plugin/diagnostic-test-paths-multithreaded-sarif.py
+++ b/gcc/testsuite/gcc.dg/plugin/diagnostic-test-paths-multithreaded-sarif.py
@@ -45,7 +45,7 @@  def sarif():
 
 def test_basics(sarif):
     schema = sarif['$schema']
-    assert schema == "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json"
+    assert schema == "https://docs.oasis-open.org/sarif/sarif/v2.1.0/errata01/os/schemas/sarif-schema-2.1.0.json"
 
     version = sarif['version']
     assert version == "2.1.0"
diff --git a/gcc/testsuite/gcc.dg/sarif-output/test-include-chain-1.py b/gcc/testsuite/gcc.dg/sarif-output/test-include-chain-1.py
index 4bb2ebf61473..87e7627dcbd6 100644
--- a/gcc/testsuite/gcc.dg/sarif-output/test-include-chain-1.py
+++ b/gcc/testsuite/gcc.dg/sarif-output/test-include-chain-1.py
@@ -8,7 +8,7 @@  def sarif():
 
 def test_basics(sarif):
     schema = sarif['$schema']
-    assert schema == "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json"
+    assert schema == "https://docs.oasis-open.org/sarif/sarif/v2.1.0/errata01/os/schemas/sarif-schema-2.1.0.json"
 
     version = sarif['version']
     assert version == "2.1.0"
diff --git a/gcc/testsuite/gcc.dg/sarif-output/test-include-chain-2.py b/gcc/testsuite/gcc.dg/sarif-output/test-include-chain-2.py
index 843f89a94853..3671e8d7d8d3 100644
--- a/gcc/testsuite/gcc.dg/sarif-output/test-include-chain-2.py
+++ b/gcc/testsuite/gcc.dg/sarif-output/test-include-chain-2.py
@@ -26,7 +26,7 @@  def sarif():
 
 def test_basics(sarif):
     schema = sarif['$schema']
-    assert schema == "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json"
+    assert schema == "https://docs.oasis-open.org/sarif/sarif/v2.1.0/errata01/os/schemas/sarif-schema-2.1.0.json"
 
     version = sarif['version']
     assert version == "2.1.0"
diff --git a/gcc/testsuite/gcc.dg/sarif-output/test-missing-semicolon.py b/gcc/testsuite/gcc.dg/sarif-output/test-missing-semicolon.py
index 17759d35a468..58c0a7d02cd8 100644
--- a/gcc/testsuite/gcc.dg/sarif-output/test-missing-semicolon.py
+++ b/gcc/testsuite/gcc.dg/sarif-output/test-missing-semicolon.py
@@ -8,7 +8,7 @@  def sarif():
 
 def test_basics(sarif):
     schema = sarif['$schema']
-    assert schema == "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json"
+    assert schema == "https://docs.oasis-open.org/sarif/sarif/v2.1.0/errata01/os/schemas/sarif-schema-2.1.0.json"
 
     version = sarif['version']
     assert version == "2.1.0"
diff --git a/gcc/testsuite/gcc.dg/sarif-output/test-no-diagnostics.py b/gcc/testsuite/gcc.dg/sarif-output/test-no-diagnostics.py
index f5812df17dc0..a3e052f100a0 100644
--- a/gcc/testsuite/gcc.dg/sarif-output/test-no-diagnostics.py
+++ b/gcc/testsuite/gcc.dg/sarif-output/test-no-diagnostics.py
@@ -8,7 +8,7 @@  def sarif():
 
 def test_basics(sarif):
     schema = sarif['$schema']
-    assert schema == "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json"
+    assert schema == "https://docs.oasis-open.org/sarif/sarif/v2.1.0/errata01/os/schemas/sarif-schema-2.1.0.json"
 
     version = sarif['version']
     assert version == "2.1.0"
diff --git a/gcc/testsuite/gcc.dg/sarif-output/test-werror.py b/gcc/testsuite/gcc.dg/sarif-output/test-werror.py
index 99c2c2c97919..291a26b55880 100644
--- a/gcc/testsuite/gcc.dg/sarif-output/test-werror.py
+++ b/gcc/testsuite/gcc.dg/sarif-output/test-werror.py
@@ -8,7 +8,7 @@  def sarif():
 
 def test_basics(sarif):
     schema = sarif['$schema']
-    assert schema == "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json"
+    assert schema == "https://docs.oasis-open.org/sarif/sarif/v2.1.0/errata01/os/schemas/sarif-schema-2.1.0.json"
 
     version = sarif['version']
     assert version == "2.1.0"
diff --git a/gcc/testsuite/lib/sarif-schema-2.1.0.json b/gcc/testsuite/lib/sarif-schema-2.1.0.json
index e0b652457104..0f58372b548f 100644
--- a/gcc/testsuite/lib/sarif-schema-2.1.0.json
+++ b/gcc/testsuite/lib/sarif-schema-2.1.0.json
@@ -1,7 +1,7 @@ 
 {
-  "$schema": "http://json-schema.org/draft-07/schema#",
+  "$schema": "http://json-schema.org/draft-04/schema#",
   "title": "Static Analysis Results Format (SARIF) Version 2.1.0 JSON Schema",
-  "$id": "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json",
+  "id": "https://docs.oasis-open.org/sarif/sarif/v2.1.0/errata01/os/schemas/sarif-schema-2.1.0.json",
   "description": "Static Analysis Results Format (SARIF) Version 2.1.0 JSON Schema: a standard format for the output of static analysis tools.",
   "additionalProperties": false,
   "type": "object",
@@ -15,12 +15,13 @@ 
 
     "version": {
       "description": "The SARIF format version of this log file.",
-      "enum": [ "2.1.0" ]
+      "enum": [ "2.1.0" ],
+      "type": "string"
     },
 
     "runs": {
       "description": "The set of runs contained in this log file.",
-      "type": "array",
+      "type": [ "array", "null" ],
       "minItems": 0,
       "uniqueItems": false,
       "items": {
@@ -181,7 +182,8 @@ 
               "userSpecifiedConfiguration",
               "toolSpecifiedConfiguration",
               "debugOutputFile"
-            ]
+            ],
+            "type": "string"
           }
         },
 
@@ -587,17 +589,18 @@ 
 
         "version": {
           "description": "The SARIF format version of this external properties object.",
-          "enum": [ "2.1.0" ]
+          "enum": [ "2.1.0" ],
+          "type": "string"
         },
 
         "guid": {
-          "description": "A stable, unique identifer for this external properties object, in the form of a GUID.",
+          "description": "A stable, unique identifier for this external properties object, in the form of a GUID.",
           "type": "string",
           "pattern": "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$"
         },
 
         "runGuid": {
-          "description": "A stable, unique identifer for the run associated with this external properties object, in the form of a GUID.",
+          "description": "A stable, unique identifier for the run associated with this external properties object, in the form of a GUID.",
           "type": "string",
           "pattern": "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$"
         },
@@ -778,7 +781,7 @@ 
         },
 
         "guid": {
-          "description": "A stable, unique identifer for the external property file in the form of a GUID.",
+          "description": "A stable, unique identifier for the external property file in the form of a GUID.",
           "type": "string",
           "pattern": "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$"
         },
@@ -1563,7 +1566,8 @@ 
         "level": {
           "description": "A value specifying the severity level of the notification.",
           "default": "warning",
-          "enum": [ "none", "note", "warning", "error" ]
+          "enum": [ "none", "note", "warning", "error" ],
+          "type": "string"
         },
 
         "threadId": {
@@ -1775,7 +1779,13 @@ 
           "description": "Key/value pairs that provide additional information about the region.",
           "$ref": "#/definitions/propertyBag"
         }
-      }
+      },
+
+      "anyOf": [
+        { "required": [ "startLine" ] },
+        { "required": [ "charOffset" ] },
+        { "required": [ "byteOffset" ] }
+      ]
     },
 
     "replacement": {
@@ -1825,7 +1835,7 @@ 
         },
 
         "guid": {
-          "description": "A unique identifer for the reporting descriptor in the form of a GUID.",
+          "description": "A unique identifier for the reporting descriptor in the form of a GUID.",
           "type": "string",
           "pattern": "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$"
         },
@@ -1924,7 +1934,8 @@ 
         "level": {
           "description": "Specifies the failure level for the report.",
           "default": "warning",
-          "enum": [ "none", "note", "warning", "error" ]
+          "enum": [ "none", "note", "warning", "error" ],
+          "type": "string"
         },
 
         "rank": {
@@ -2048,13 +2059,15 @@ 
         "kind": {
           "description": "A value that categorizes results by evaluation state.",
           "default": "fail",
-          "enum": [ "notApplicable", "pass", "fail", "review", "open", "informational" ]
+          "enum": [ "notApplicable", "pass", "fail", "review", "open", "informational" ],
+          "type": "string"
         },
 
         "level": {
           "description": "A value specifying the severity level of the result.",
           "default": "warning",
-          "enum": [ "none", "note", "warning", "error" ]
+          "enum": [ "none", "note", "warning", "error" ],
+          "type": "string"
         },
 
         "message": {
@@ -2079,7 +2092,7 @@ 
         },
 
         "guid": {
-          "description": "A stable, unique identifer for the result in the form of a GUID.",
+          "description": "A stable, unique identifier for the result in the form of a GUID.",
           "type": "string",
           "pattern": "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$"
         },
@@ -2184,7 +2197,8 @@ 
             "unchanged",
             "updated",
             "absent"
-          ]
+          ],
+          "type": "string"
         },
 
         "rank": {
@@ -2354,7 +2368,7 @@ 
           "description": "The language of the messages emitted into the log file during this run (expressed as an ISO 639-1 two-letter lowercase culture code) and an optional region (expressed as an ISO 3166-1 two-letter uppercase subculture code associated with a country or region). The casing is recommended but not required (in order for this data to conform to RFC5646).",
           "type": "string",
           "default": "en-US",
-          "pattern": "^[a-zA-Z]{2}|^[a-zA-Z]{2}-[a-zA-Z]{2}]?$"
+          "pattern": "^[a-zA-Z]{2}(-[a-zA-Z]{2})?$"
         },
 
         "versionControlProvenance": {
@@ -2474,7 +2488,8 @@ 
 
         "columnKind": {
           "description": "Specifies the unit in which the tool measures columns.",
-          "enum": [ "utf16CodeUnits", "unicodeCodePoints" ]
+          "enum": [ "utf16CodeUnits", "unicodeCodePoints" ],
+          "type": "string"
         },
 
         "externalPropertyFileReferences": {
@@ -2590,7 +2605,7 @@ 
         },
 
         "guid": {
-          "description": "A stable, unique identifer for this object's containing run object in the form of a GUID.",
+          "description": "A stable, unique identifier for this object's containing run object in the form of a GUID.",
           "type": "string",
           "pattern": "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$"
         },
@@ -2702,7 +2717,7 @@ 
       "properties": {
 
         "guid": {
-          "description": "A stable, unique identifer for the suprression in the form of a GUID.",
+          "description": "A stable, unique identifier for the suprression in the form of a GUID.",
           "type": "string",
           "pattern": "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$"
         },
@@ -2712,7 +2727,8 @@ 
           "enum": [
             "inSource",
             "external"
-          ]
+          ],
+          "type": "string"
         },
 
         "status": {
@@ -2721,7 +2737,8 @@ 
             "accepted",
             "underReview",
             "rejected"
-          ]
+          ],
+          "type": "string"
         },
 
         "justification": {
@@ -2874,7 +2891,8 @@ 
         "importance": {
           "description": "Specifies the importance of this location in understanding the code flow in which it occurs. The order from most to least important is \"essential\", \"important\", \"unimportant\". Default: \"important\".",
           "enum": [ "important", "essential", "unimportant" ],
-          "default": "important"
+          "default": "important",
+          "type": "string"
         },
 
         "webRequest": {
@@ -2932,7 +2950,7 @@ 
       "properties": {
 
         "guid": {
-          "description": "A unique identifer for the tool component in the form of a GUID.",
+          "description": "A unique identifier for the tool component in the form of a GUID.",
           "type": "string",
           "pattern": "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$"
         },
@@ -3060,7 +3078,7 @@ 
           "description": "The language of the messages emitted into the log file during this run (expressed as an ISO 639-1 two-letter lowercase language code) and an optional region (expressed as an ISO 3166-1 two-letter uppercase subculture code associated with a country or region). The casing is recommended but not required (in order for this data to conform to RFC5646).",
           "type": "string",
           "default": "en-US",
-          "pattern": "^[a-zA-Z]{2}|^[a-zA-Z]{2}-[a-zA-Z]{2}]?$"
+          "pattern": "^[a-zA-Z]{2}(-[a-zA-Z]{2})?$"
         },
 
         "contents": {
@@ -3072,7 +3090,8 @@ 
             "enum": [
               "localizedData",
               "nonLocalizedData"
-            ]
+            ],
+            "type": "string"
           }
         },