diff mbox series

[v5,52/65] i386/tdx: Wire TDX_REPORT_FATAL_ERROR with GuestPanic facility

Message ID 20240229063726.610065-53-xiaoyao.li@intel.com
State New
Headers show
Series QEMU Guest memfd + QEMU TDX support | expand

Commit Message

Xiaoyao Li Feb. 29, 2024, 6:37 a.m. UTC
Integrate TDX's TDX_REPORT_FATAL_ERROR into QEMU GuestPanic facility

Originated-from: Isaku Yamahata <isaku.yamahata@intel.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
---
Changes in v5:
- mention additional error information in gpa when it presents;
- refine the documentation; (Markus)

Changes in v4:
- refine the documentation; (Markus)

Changes in v3:
- Add docmentation of new type and struct; (Daniel)
- refine the error message handling; (Daniel)
---
 qapi/run-state.json   | 31 +++++++++++++++++++++--
 system/runstate.c     | 58 +++++++++++++++++++++++++++++++++++++++++++
 target/i386/kvm/tdx.c | 24 +++++++++++++++++-
 3 files changed, 110 insertions(+), 3 deletions(-)

Comments

Markus Armbruster Feb. 29, 2024, 8:51 a.m. UTC | #1
Xiaoyao Li <xiaoyao.li@intel.com> writes:

> Integrate TDX's TDX_REPORT_FATAL_ERROR into QEMU GuestPanic facility
>
> Originated-from: Isaku Yamahata <isaku.yamahata@intel.com>
> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
> ---
> Changes in v5:
> - mention additional error information in gpa when it presents;
> - refine the documentation; (Markus)
>
> Changes in v4:
> - refine the documentation; (Markus)
>
> Changes in v3:
> - Add docmentation of new type and struct; (Daniel)
> - refine the error message handling; (Daniel)
> ---
>  qapi/run-state.json   | 31 +++++++++++++++++++++--
>  system/runstate.c     | 58 +++++++++++++++++++++++++++++++++++++++++++
>  target/i386/kvm/tdx.c | 24 +++++++++++++++++-
>  3 files changed, 110 insertions(+), 3 deletions(-)
>
> diff --git a/qapi/run-state.json b/qapi/run-state.json
> index dd0770b379e5..b71dd1884eb6 100644
> --- a/qapi/run-state.json
> +++ b/qapi/run-state.json
> @@ -483,10 +483,12 @@
>  #
>  # @s390: s390 guest panic information type (Since: 2.12)
>  #
> +# @tdx: tdx guest panic information type (Since: 9.0)
> +#
>  # Since: 2.9
>  ##
>  { 'enum': 'GuestPanicInformationType',
> -  'data': [ 'hyper-v', 's390' ] }
> +  'data': [ 'hyper-v', 's390', 'tdx' ] }
>  
>  ##
>  # @GuestPanicInformation:
> @@ -501,7 +503,8 @@
>   'base': {'type': 'GuestPanicInformationType'},
>   'discriminator': 'type',
>   'data': {'hyper-v': 'GuestPanicInformationHyperV',
> -          's390': 'GuestPanicInformationS390'}}
> +          's390': 'GuestPanicInformationS390',
> +          'tdx' : 'GuestPanicInformationTdx'}}
>  
>  ##
>  # @GuestPanicInformationHyperV:
> @@ -564,6 +567,30 @@
>            'psw-addr': 'uint64',
>            'reason': 'S390CrashReason'}}
>  
> +##
> +# @GuestPanicInformationTdx:
> +#
> +# TDX Guest panic information specific to TDX, as specified in the
> +# "Guest-Hypervisor Communication Interface (GHCI) Specification",
> +# section TDG.VP.VMCALL<ReportFatalError>.
> +#
> +# @error-code: TD-specific error code
> +#
> +# @message: Human-readable error message provided by the guest. Not
> +#     to be trusted.
> +#
> +# @gpa: guest-physical address of a page that contains more verbose
> +#     error information, as zero-terminated string.  Present when the
> +#     "GPA valid" bit (bit 63) is set in @error-code.

Uh, peeking at GHCI Spec section 3.4 TDG.VP.VMCALL<ReportFatalError>, I
see operand R12 consists of

    bits    name                        description
    31:0    TD-specific error code      TD-specific error code
                                        Panic – 0x0.
                                        Values – 0x1 to 0xFFFFFFFF
                                        reserved.
    62:32   TD-specific extended        TD-specific extended error code.
            error code                  TD software defined.
    63      GPA Valid                   Set if the TD specified additional
                                        information in the GPA parameter
                                        (R13).

Is @error-code all of R12, or just bits 31:0?

If it's all of R12, description of @error-code as "TD-specific error
code" is misleading.

If it's just bits 31:0, then 'Present when the "GPA valid" bit (bit 63)
is set in @error-code' is wrong.  Could go with 'Only present when the
guest provides this information'.

> +#
> +#

Drop one of these two lines, please.

> +# Since: 9.0
> +##
> +{'struct': 'GuestPanicInformationTdx',
> + 'data': {'error-code': 'uint64',
> +          'message': 'str',
> +          '*gpa': 'uint64'}}
> +
>  ##
>  # @MEMORY_FAILURE:
>  #
Xiaoyao Li March 7, 2024, 11:30 a.m. UTC | #2
On 2/29/2024 4:51 PM, Markus Armbruster wrote:
> Xiaoyao Li <xiaoyao.li@intel.com> writes:
> 
>> Integrate TDX's TDX_REPORT_FATAL_ERROR into QEMU GuestPanic facility
>>
>> Originated-from: Isaku Yamahata <isaku.yamahata@intel.com>
>> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
>> ---
>> Changes in v5:
>> - mention additional error information in gpa when it presents;
>> - refine the documentation; (Markus)
>>
>> Changes in v4:
>> - refine the documentation; (Markus)
>>
>> Changes in v3:
>> - Add docmentation of new type and struct; (Daniel)
>> - refine the error message handling; (Daniel)
>> ---
>>   qapi/run-state.json   | 31 +++++++++++++++++++++--
>>   system/runstate.c     | 58 +++++++++++++++++++++++++++++++++++++++++++
>>   target/i386/kvm/tdx.c | 24 +++++++++++++++++-
>>   3 files changed, 110 insertions(+), 3 deletions(-)
>>
>> diff --git a/qapi/run-state.json b/qapi/run-state.json
>> index dd0770b379e5..b71dd1884eb6 100644
>> --- a/qapi/run-state.json
>> +++ b/qapi/run-state.json
>> @@ -483,10 +483,12 @@
>>   #
>>   # @s390: s390 guest panic information type (Since: 2.12)
>>   #
>> +# @tdx: tdx guest panic information type (Since: 9.0)
>> +#
>>   # Since: 2.9
>>   ##
>>   { 'enum': 'GuestPanicInformationType',
>> -  'data': [ 'hyper-v', 's390' ] }
>> +  'data': [ 'hyper-v', 's390', 'tdx' ] }
>>   
>>   ##
>>   # @GuestPanicInformation:
>> @@ -501,7 +503,8 @@
>>    'base': {'type': 'GuestPanicInformationType'},
>>    'discriminator': 'type',
>>    'data': {'hyper-v': 'GuestPanicInformationHyperV',
>> -          's390': 'GuestPanicInformationS390'}}
>> +          's390': 'GuestPanicInformationS390',
>> +          'tdx' : 'GuestPanicInformationTdx'}}
>>   
>>   ##
>>   # @GuestPanicInformationHyperV:
>> @@ -564,6 +567,30 @@
>>             'psw-addr': 'uint64',
>>             'reason': 'S390CrashReason'}}
>>   
>> +##
>> +# @GuestPanicInformationTdx:
>> +#
>> +# TDX Guest panic information specific to TDX, as specified in the
>> +# "Guest-Hypervisor Communication Interface (GHCI) Specification",
>> +# section TDG.VP.VMCALL<ReportFatalError>.
>> +#
>> +# @error-code: TD-specific error code
>> +#
>> +# @message: Human-readable error message provided by the guest. Not
>> +#     to be trusted.
>> +#
>> +# @gpa: guest-physical address of a page that contains more verbose
>> +#     error information, as zero-terminated string.  Present when the
>> +#     "GPA valid" bit (bit 63) is set in @error-code.
> 
> Uh, peeking at GHCI Spec section 3.4 TDG.VP.VMCALL<ReportFatalError>, I
> see operand R12 consists of
> 
>      bits    name                        description
>      31:0    TD-specific error code      TD-specific error code
>                                          Panic – 0x0.
>                                          Values – 0x1 to 0xFFFFFFFF
>                                          reserved.
>      62:32   TD-specific extended        TD-specific extended error code.
>              error code                  TD software defined.
>      63      GPA Valid                   Set if the TD specified additional
>                                          information in the GPA parameter
>                                          (R13).
> 
> Is @error-code all of R12, or just bits 31:0?
> 
> If it's all of R12, description of @error-code as "TD-specific error
> code" is misleading.

We pass all of R12 to @error_code.

Here it wants to use "error_code" as generic as the whole R12. Do you 
have any better description of it ?

> If it's just bits 31:0, then 'Present when the "GPA valid" bit (bit 63)
> is set in @error-code' is wrong.  Could go with 'Only present when the
> guest provides this information'.
> 
>> +#
>> +#
> 
> Drop one of these two lines, please.
> 
>> +# Since: 9.0
>> +##
>> +{'struct': 'GuestPanicInformationTdx',
>> + 'data': {'error-code': 'uint64',
>> +          'message': 'str',
>> +          '*gpa': 'uint64'}}
>> +
>>   ##
>>   # @MEMORY_FAILURE:
>>   #
>
Markus Armbruster March 7, 2024, 1:51 p.m. UTC | #3
Xiaoyao Li <xiaoyao.li@intel.com> writes:

> On 2/29/2024 4:51 PM, Markus Armbruster wrote:
>> Xiaoyao Li <xiaoyao.li@intel.com> writes:
>> 
>>> Integrate TDX's TDX_REPORT_FATAL_ERROR into QEMU GuestPanic facility
>>>
>>> Originated-from: Isaku Yamahata <isaku.yamahata@intel.com>
>>> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
>>> ---
>>> Changes in v5:
>>> - mention additional error information in gpa when it presents;
>>> - refine the documentation; (Markus)
>>>
>>> Changes in v4:
>>> - refine the documentation; (Markus)
>>>
>>> Changes in v3:
>>> - Add docmentation of new type and struct; (Daniel)
>>> - refine the error message handling; (Daniel)
>>> ---
>>>   qapi/run-state.json   | 31 +++++++++++++++++++++--
>>>   system/runstate.c     | 58 +++++++++++++++++++++++++++++++++++++++++++
>>>   target/i386/kvm/tdx.c | 24 +++++++++++++++++-
>>>   3 files changed, 110 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/qapi/run-state.json b/qapi/run-state.json
>>> index dd0770b379e5..b71dd1884eb6 100644
>>> --- a/qapi/run-state.json
>>> +++ b/qapi/run-state.json
>>> @@ -483,10 +483,12 @@
>>>  #
>>>  # @s390: s390 guest panic information type (Since: 2.12)
>>>  #
>>> +# @tdx: tdx guest panic information type (Since: 9.0)
>>> +#
>>>  # Since: 2.9
>>>  ##
>>>  { 'enum': 'GuestPanicInformationType',
>>> -  'data': [ 'hyper-v', 's390' ] }
>>> +  'data': [ 'hyper-v', 's390', 'tdx' ] }
>>>     ##
>>> # @GuestPanicInformation:
>>> @@ -501,7 +503,8 @@
>>>    'base': {'type': 'GuestPanicInformationType'},
>>>    'discriminator': 'type',
>>>    'data': {'hyper-v': 'GuestPanicInformationHyperV',
>>> -          's390': 'GuestPanicInformationS390'}}
>>> +          's390': 'GuestPanicInformationS390',
>>> +          'tdx' : 'GuestPanicInformationTdx'}}
>>>  ##
>>>  # @GuestPanicInformationHyperV:
>>> @@ -564,6 +567,30 @@
>>>             'psw-addr': 'uint64',
>>>             'reason': 'S390CrashReason'}}
>>> +##
>>> +# @GuestPanicInformationTdx:
>>> +#
>>> +# TDX Guest panic information specific to TDX, as specified in the
>>> +# "Guest-Hypervisor Communication Interface (GHCI) Specification",
>>> +# section TDG.VP.VMCALL<ReportFatalError>.
>>> +#
>>> +# @error-code: TD-specific error code
>>> +#
>>> +# @message: Human-readable error message provided by the guest. Not
>>> +#     to be trusted.
>>> +#
>>> +# @gpa: guest-physical address of a page that contains more verbose
>>> +#     error information, as zero-terminated string.  Present when the
>>> +#     "GPA valid" bit (bit 63) is set in @error-code.
>>
>> Uh, peeking at GHCI Spec section 3.4 TDG.VP.VMCALL<ReportFatalError>, I
>> see operand R12 consists of
>>
>>      bits    name                        description
>>      31:0    TD-specific error code      TD-specific error code
>>                                          Panic – 0x0.
>>                                          Values – 0x1 to 0xFFFFFFFF
>>                                          reserved.
>>      62:32   TD-specific extended        TD-specific extended error code.
>>              error code                  TD software defined.
>>      63      GPA Valid                   Set if the TD specified additional
>>                                          information in the GPA parameter
>>                                          (R13).
>> Is @error-code all of R12, or just bits 31:0?
>> If it's all of R12, description of @error-code as "TD-specific error
>> code" is misleading.
>
> We pass all of R12 to @error_code.
>
> Here it wants to use "error_code" as generic as the whole R12. Do you have any better description of it ?

Sadly, the spec is of no help: it doesn't name the entire thing, only
the three sub-fields TD-specific error code, TD-specific extended error
code, GPA valid.

We could take the hint, and provide the sub-fields instead:

* @error-code contains the TD-specific error code (bits 31:0)

* @extended-error-code contains the TD-specific extended error code
  (bits 62:32)

* we don't need @gpa-valid, because it's the same as "@gpa is present"

If we decide to keep the single member, we do need another name for it.
@error-codes (plural) doesn't exactly feel wonderful, but it gives at
least a subtle hint that it's not just *the* error code.

>> If it's just bits 31:0, then 'Present when the "GPA valid" bit (bit 63)
>> is set in @error-code' is wrong.  Could go with 'Only present when the
>> guest provides this information'.
>> 
>>> +#
>>> +#
>>
>> Drop one of these two lines, please.
>> 
>>> +# Since: 9.0
>>> +##
>>> +{'struct': 'GuestPanicInformationTdx',
>>> + 'data': {'error-code': 'uint64',
>>> +          'message': 'str',
>>> +          '*gpa': 'uint64'}}
>>> +
>>>   ##
>>>   # @MEMORY_FAILURE:
>>>   #
>>
Xiaoyao Li March 11, 2024, 1:28 a.m. UTC | #4
On 3/7/2024 9:51 PM, Markus Armbruster wrote:
> Xiaoyao Li <xiaoyao.li@intel.com> writes:
> 
>> On 2/29/2024 4:51 PM, Markus Armbruster wrote:
>>> Xiaoyao Li <xiaoyao.li@intel.com> writes:
>>>
>>>> Integrate TDX's TDX_REPORT_FATAL_ERROR into QEMU GuestPanic facility
>>>>
>>>> Originated-from: Isaku Yamahata <isaku.yamahata@intel.com>
>>>> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
>>>> ---
>>>> Changes in v5:
>>>> - mention additional error information in gpa when it presents;
>>>> - refine the documentation; (Markus)
>>>>
>>>> Changes in v4:
>>>> - refine the documentation; (Markus)
>>>>
>>>> Changes in v3:
>>>> - Add docmentation of new type and struct; (Daniel)
>>>> - refine the error message handling; (Daniel)
>>>> ---
>>>>    qapi/run-state.json   | 31 +++++++++++++++++++++--
>>>>    system/runstate.c     | 58 +++++++++++++++++++++++++++++++++++++++++++
>>>>    target/i386/kvm/tdx.c | 24 +++++++++++++++++-
>>>>    3 files changed, 110 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/qapi/run-state.json b/qapi/run-state.json
>>>> index dd0770b379e5..b71dd1884eb6 100644
>>>> --- a/qapi/run-state.json
>>>> +++ b/qapi/run-state.json
>>>> @@ -483,10 +483,12 @@
>>>>   #
>>>>   # @s390: s390 guest panic information type (Since: 2.12)
>>>>   #
>>>> +# @tdx: tdx guest panic information type (Since: 9.0)
>>>> +#
>>>>   # Since: 2.9
>>>>   ##
>>>>   { 'enum': 'GuestPanicInformationType',
>>>> -  'data': [ 'hyper-v', 's390' ] }
>>>> +  'data': [ 'hyper-v', 's390', 'tdx' ] }
>>>>      ##
>>>> # @GuestPanicInformation:
>>>> @@ -501,7 +503,8 @@
>>>>     'base': {'type': 'GuestPanicInformationType'},
>>>>     'discriminator': 'type',
>>>>     'data': {'hyper-v': 'GuestPanicInformationHyperV',
>>>> -          's390': 'GuestPanicInformationS390'}}
>>>> +          's390': 'GuestPanicInformationS390',
>>>> +          'tdx' : 'GuestPanicInformationTdx'}}
>>>>   ##
>>>>   # @GuestPanicInformationHyperV:
>>>> @@ -564,6 +567,30 @@
>>>>              'psw-addr': 'uint64',
>>>>              'reason': 'S390CrashReason'}}
>>>> +##
>>>> +# @GuestPanicInformationTdx:
>>>> +#
>>>> +# TDX Guest panic information specific to TDX, as specified in the
>>>> +# "Guest-Hypervisor Communication Interface (GHCI) Specification",
>>>> +# section TDG.VP.VMCALL<ReportFatalError>.
>>>> +#
>>>> +# @error-code: TD-specific error code
>>>> +#
>>>> +# @message: Human-readable error message provided by the guest. Not
>>>> +#     to be trusted.
>>>> +#
>>>> +# @gpa: guest-physical address of a page that contains more verbose
>>>> +#     error information, as zero-terminated string.  Present when the
>>>> +#     "GPA valid" bit (bit 63) is set in @error-code.
>>>
>>> Uh, peeking at GHCI Spec section 3.4 TDG.VP.VMCALL<ReportFatalError>, I
>>> see operand R12 consists of
>>>
>>>       bits    name                        description
>>>       31:0    TD-specific error code      TD-specific error code
>>>                                           Panic – 0x0.
>>>                                           Values – 0x1 to 0xFFFFFFFF
>>>                                           reserved.
>>>       62:32   TD-specific extended        TD-specific extended error code.
>>>               error code                  TD software defined.
>>>       63      GPA Valid                   Set if the TD specified additional
>>>                                           information in the GPA parameter
>>>                                           (R13).
>>> Is @error-code all of R12, or just bits 31:0?
>>> If it's all of R12, description of @error-code as "TD-specific error
>>> code" is misleading.
>>
>> We pass all of R12 to @error_code.
>>
>> Here it wants to use "error_code" as generic as the whole R12. Do you have any better description of it ?
> 
> Sadly, the spec is of no help: it doesn't name the entire thing, only
> the three sub-fields TD-specific error code, TD-specific extended error
> code, GPA valid.
> 
> We could take the hint, and provide the sub-fields instead:
> 
> * @error-code contains the TD-specific error code (bits 31:0)
> 
> * @extended-error-code contains the TD-specific extended error code
>    (bits 62:32)
> 
> * we don't need @gpa-valid, because it's the same as "@gpa is present"
> 
> If we decide to keep the single member, we do need another name for it.
> @error-codes (plural) doesn't exactly feel wonderful, but it gives at
> least a subtle hint that it's not just *the* error code.

The reason we only defined one single member, is that the 
extended-error-code is not used now, and I believe it won't be used in 
the near future.

If no objection from others, I will use @error-codes (plural) in the 
next version.

>>> If it's just bits 31:0, then 'Present when the "GPA valid" bit (bit 63)
>>> is set in @error-code' is wrong.  Could go with 'Only present when the
>>> guest provides this information'.
>>>
>>>> +#
>>>> +#
>>>
>>> Drop one of these two lines, please.
>>>
>>>> +# Since: 9.0
>>>> +##
>>>> +{'struct': 'GuestPanicInformationTdx',
>>>> + 'data': {'error-code': 'uint64',
>>>> +          'message': 'str',
>>>> +          '*gpa': 'uint64'}}
>>>> +
>>>>    ##
>>>>    # @MEMORY_FAILURE:
>>>>    #
>>>
>
Markus Armbruster March 11, 2024, 7:29 a.m. UTC | #5
Xiaoyao Li <xiaoyao.li@intel.com> writes:

> On 3/7/2024 9:51 PM, Markus Armbruster wrote:
>> Xiaoyao Li <xiaoyao.li@intel.com> writes:
>> 
>>> On 2/29/2024 4:51 PM, Markus Armbruster wrote:
>>>> Xiaoyao Li <xiaoyao.li@intel.com> writes:
>>>>
>>>>> Integrate TDX's TDX_REPORT_FATAL_ERROR into QEMU GuestPanic facility
>>>>>
>>>>> Originated-from: Isaku Yamahata <isaku.yamahata@intel.com>
>>>>> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
>>>>> ---
>>>>> Changes in v5:
>>>>> - mention additional error information in gpa when it presents;
>>>>> - refine the documentation; (Markus)
>>>>>
>>>>> Changes in v4:
>>>>> - refine the documentation; (Markus)
>>>>>
>>>>> Changes in v3:
>>>>> - Add docmentation of new type and struct; (Daniel)
>>>>> - refine the error message handling; (Daniel)
>>>>> ---
>>>>>    qapi/run-state.json   | 31 +++++++++++++++++++++--
>>>>>    system/runstate.c     | 58 +++++++++++++++++++++++++++++++++++++++++++
>>>>>    target/i386/kvm/tdx.c | 24 +++++++++++++++++-
>>>>>    3 files changed, 110 insertions(+), 3 deletions(-)
>>>>>
>>>>> diff --git a/qapi/run-state.json b/qapi/run-state.json
>>>>> index dd0770b379e5..b71dd1884eb6 100644
>>>>> --- a/qapi/run-state.json
>>>>> +++ b/qapi/run-state.json

[...]

>>>>> @@ -564,6 +567,30 @@
>>>>>              'psw-addr': 'uint64',
>>>>>              'reason': 'S390CrashReason'}}
>>>>> +##
>>>>> +# @GuestPanicInformationTdx:
>>>>> +#
>>>>> +# TDX Guest panic information specific to TDX, as specified in the
>>>>> +# "Guest-Hypervisor Communication Interface (GHCI) Specification",
>>>>> +# section TDG.VP.VMCALL<ReportFatalError>.
>>>>> +#
>>>>> +# @error-code: TD-specific error code
>>>>> +#
>>>>> +# @message: Human-readable error message provided by the guest. Not
>>>>> +#     to be trusted.
>>>>> +#
>>>>> +# @gpa: guest-physical address of a page that contains more verbose
>>>>> +#     error information, as zero-terminated string.  Present when the
>>>>> +#     "GPA valid" bit (bit 63) is set in @error-code.
>>>>
>>>> Uh, peeking at GHCI Spec section 3.4 TDG.VP.VMCALL<ReportFatalError>, I
>>>> see operand R12 consists of
>>>>
>>>>       bits    name                        description
>>>>       31:0    TD-specific error code      TD-specific error code
>>>>                                           Panic – 0x0.
>>>>                                           Values – 0x1 to 0xFFFFFFFF
>>>>                                           reserved.
>>>>       62:32   TD-specific extended        TD-specific extended error code.
>>>>               error code                  TD software defined.
>>>>       63      GPA Valid                   Set if the TD specified additional
>>>>                                           information in the GPA parameter
>>>>                                           (R13).
>>>> Is @error-code all of R12, or just bits 31:0?
>>>> If it's all of R12, description of @error-code as "TD-specific error
>>>> code" is misleading.
>>>
>>> We pass all of R12 to @error_code.
>>>
>>> Here it wants to use "error_code" as generic as the whole R12. Do you have any better description of it ?
>> 
>> Sadly, the spec is of no help: it doesn't name the entire thing, only
>> the three sub-fields TD-specific error code, TD-specific extended error
>> code, GPA valid.
>> 
>> We could take the hint, and provide the sub-fields instead:
>> 
>> * @error-code contains the TD-specific error code (bits 31:0)
>> 
>> * @extended-error-code contains the TD-specific extended error code
>>    (bits 62:32)
>> 
>> * we don't need @gpa-valid, because it's the same as "@gpa is present"
>> 
>> If we decide to keep the single member, we do need another name for it.
>> @error-codes (plural) doesn't exactly feel wonderful, but it gives at
>> least a subtle hint that it's not just *the* error code.
>
> The reason we only defined one single member, is that the 
> extended-error-code is not used now, and I believe it won't be used in 
> the near future.

Aha!  Then I recommend

* @error-code contains the TD-specific error code (bits 31:0)

* Omit bits 62:32 from the reply; if we later find an actual use for
  them, we can add a suitable member

* Omit bit 63, because it's the same as "@gpa is present"

> If no objection from others, I will use @error-codes (plural) in the 
> next version.

I recommend to keep the @error-code name, but narrow its value to the
actual error code, i.e. bits 31:0.

>>>> If it's just bits 31:0, then 'Present when the "GPA valid" bit (bit 63)
>>>> is set in @error-code' is wrong.  Could go with 'Only present when the
>>>> guest provides this information'.

[...]
Xiaoyao Li March 12, 2024, 7:27 a.m. UTC | #6
On 3/11/2024 3:29 PM, Markus Armbruster wrote:
> Xiaoyao Li <xiaoyao.li@intel.com> writes:
> 
>> On 3/7/2024 9:51 PM, Markus Armbruster wrote:
>>> Xiaoyao Li <xiaoyao.li@intel.com> writes:
>>>
>>>> On 2/29/2024 4:51 PM, Markus Armbruster wrote:
>>>>> Xiaoyao Li <xiaoyao.li@intel.com> writes:
>>>>>
>>>>>> Integrate TDX's TDX_REPORT_FATAL_ERROR into QEMU GuestPanic facility
>>>>>>
>>>>>> Originated-from: Isaku Yamahata <isaku.yamahata@intel.com>
>>>>>> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
>>>>>> ---
>>>>>> Changes in v5:
>>>>>> - mention additional error information in gpa when it presents;
>>>>>> - refine the documentation; (Markus)
>>>>>>
>>>>>> Changes in v4:
>>>>>> - refine the documentation; (Markus)
>>>>>>
>>>>>> Changes in v3:
>>>>>> - Add docmentation of new type and struct; (Daniel)
>>>>>> - refine the error message handling; (Daniel)
>>>>>> ---
>>>>>>     qapi/run-state.json   | 31 +++++++++++++++++++++--
>>>>>>     system/runstate.c     | 58 +++++++++++++++++++++++++++++++++++++++++++
>>>>>>     target/i386/kvm/tdx.c | 24 +++++++++++++++++-
>>>>>>     3 files changed, 110 insertions(+), 3 deletions(-)
>>>>>>
>>>>>> diff --git a/qapi/run-state.json b/qapi/run-state.json
>>>>>> index dd0770b379e5..b71dd1884eb6 100644
>>>>>> --- a/qapi/run-state.json
>>>>>> +++ b/qapi/run-state.json
> 
> [...]
> 
>>>>>> @@ -564,6 +567,30 @@
>>>>>>               'psw-addr': 'uint64',
>>>>>>               'reason': 'S390CrashReason'}}
>>>>>> +##
>>>>>> +# @GuestPanicInformationTdx:
>>>>>> +#
>>>>>> +# TDX Guest panic information specific to TDX, as specified in the
>>>>>> +# "Guest-Hypervisor Communication Interface (GHCI) Specification",
>>>>>> +# section TDG.VP.VMCALL<ReportFatalError>.
>>>>>> +#
>>>>>> +# @error-code: TD-specific error code
>>>>>> +#
>>>>>> +# @message: Human-readable error message provided by the guest. Not
>>>>>> +#     to be trusted.
>>>>>> +#
>>>>>> +# @gpa: guest-physical address of a page that contains more verbose
>>>>>> +#     error information, as zero-terminated string.  Present when the
>>>>>> +#     "GPA valid" bit (bit 63) is set in @error-code.
>>>>>
>>>>> Uh, peeking at GHCI Spec section 3.4 TDG.VP.VMCALL<ReportFatalError>, I
>>>>> see operand R12 consists of
>>>>>
>>>>>        bits    name                        description
>>>>>        31:0    TD-specific error code      TD-specific error code
>>>>>                                            Panic – 0x0.
>>>>>                                            Values – 0x1 to 0xFFFFFFFF
>>>>>                                            reserved.
>>>>>        62:32   TD-specific extended        TD-specific extended error code.
>>>>>                error code                  TD software defined.
>>>>>        63      GPA Valid                   Set if the TD specified additional
>>>>>                                            information in the GPA parameter
>>>>>                                            (R13).
>>>>> Is @error-code all of R12, or just bits 31:0?
>>>>> If it's all of R12, description of @error-code as "TD-specific error
>>>>> code" is misleading.
>>>>
>>>> We pass all of R12 to @error_code.
>>>>
>>>> Here it wants to use "error_code" as generic as the whole R12. Do you have any better description of it ?
>>>
>>> Sadly, the spec is of no help: it doesn't name the entire thing, only
>>> the three sub-fields TD-specific error code, TD-specific extended error
>>> code, GPA valid.
>>>
>>> We could take the hint, and provide the sub-fields instead:
>>>
>>> * @error-code contains the TD-specific error code (bits 31:0)
>>>
>>> * @extended-error-code contains the TD-specific extended error code
>>>     (bits 62:32)
>>>
>>> * we don't need @gpa-valid, because it's the same as "@gpa is present"
>>>
>>> If we decide to keep the single member, we do need another name for it.
>>> @error-codes (plural) doesn't exactly feel wonderful, but it gives at
>>> least a subtle hint that it's not just *the* error code.
>>
>> The reason we only defined one single member, is that the
>> extended-error-code is not used now, and I believe it won't be used in
>> the near future.
> 
> Aha!  Then I recommend
> 
> * @error-code contains the TD-specific error code (bits 31:0)
> 
> * Omit bits 62:32 from the reply; if we later find an actual use for
>    them, we can add a suitable member
> 
> * Omit bit 63, because it's the same as "@gpa is present"
> 
>> If no objection from others, I will use @error-codes (plural) in the
>> next version.
> 
> I recommend to keep the @error-code name, but narrow its value to the
> actual error code, i.e. bits 31:0.

It works for me. I will got this direction in the next version.

>>>>> If it's just bits 31:0, then 'Present when the "GPA valid" bit (bit 63)
>>>>> is set in @error-code' is wrong.  Could go with 'Only present when the
>>>>> guest provides this information'.
> 
> [...]
>
diff mbox series

Patch

diff --git a/qapi/run-state.json b/qapi/run-state.json
index dd0770b379e5..b71dd1884eb6 100644
--- a/qapi/run-state.json
+++ b/qapi/run-state.json
@@ -483,10 +483,12 @@ 
 #
 # @s390: s390 guest panic information type (Since: 2.12)
 #
+# @tdx: tdx guest panic information type (Since: 9.0)
+#
 # Since: 2.9
 ##
 { 'enum': 'GuestPanicInformationType',
-  'data': [ 'hyper-v', 's390' ] }
+  'data': [ 'hyper-v', 's390', 'tdx' ] }
 
 ##
 # @GuestPanicInformation:
@@ -501,7 +503,8 @@ 
  'base': {'type': 'GuestPanicInformationType'},
  'discriminator': 'type',
  'data': {'hyper-v': 'GuestPanicInformationHyperV',
-          's390': 'GuestPanicInformationS390'}}
+          's390': 'GuestPanicInformationS390',
+          'tdx' : 'GuestPanicInformationTdx'}}
 
 ##
 # @GuestPanicInformationHyperV:
@@ -564,6 +567,30 @@ 
           'psw-addr': 'uint64',
           'reason': 'S390CrashReason'}}
 
+##
+# @GuestPanicInformationTdx:
+#
+# TDX Guest panic information specific to TDX, as specified in the
+# "Guest-Hypervisor Communication Interface (GHCI) Specification",
+# section TDG.VP.VMCALL<ReportFatalError>.
+#
+# @error-code: TD-specific error code
+#
+# @message: Human-readable error message provided by the guest. Not
+#     to be trusted.
+#
+# @gpa: guest-physical address of a page that contains more verbose
+#     error information, as zero-terminated string.  Present when the
+#     "GPA valid" bit (bit 63) is set in @error-code.
+#
+#
+# Since: 9.0
+##
+{'struct': 'GuestPanicInformationTdx',
+ 'data': {'error-code': 'uint64',
+          'message': 'str',
+          '*gpa': 'uint64'}}
+
 ##
 # @MEMORY_FAILURE:
 #
diff --git a/system/runstate.c b/system/runstate.c
index d6ab860ecaa7..3b628c38739d 100644
--- a/system/runstate.c
+++ b/system/runstate.c
@@ -519,6 +519,52 @@  static void qemu_system_wakeup(void)
     }
 }
 
+static char* tdx_parse_panic_message(char *message)
+{
+    bool printable = false;
+    char *buf = NULL;
+    int len = 0, i;
+
+    /*
+     * Although message is defined as a json string, we shouldn't
+     * unconditionally treat it as is because the guest generated it and
+     * it's not necessarily trustable.
+     */
+    if (message) {
+        /* The caller guarantees the NUL-terminated string. */
+        len = strlen(message);
+
+        printable = len > 0;
+        for (i = 0; i < len; i++) {
+            if (!(0x20 <= message[i] && message[i] <= 0x7e)) {
+                printable = false;
+                break;
+            }
+        }
+    }
+
+    if (!printable && len) {
+        /* 3 = length of "%02x " */
+        buf = g_malloc(len * 3);
+        for (i = 0; i < len; i++) {
+            if (message[i] == '\0') {
+                break;
+            } else {
+                sprintf(buf + 3 * i, "%02x ", message[i]);
+            }
+        }
+        if (i > 0)
+            /* replace the last ' '(space) to NUL */
+            buf[i * 3 - 1] = '\0';
+        else
+            buf[0] = '\0';
+
+        return buf;
+    }
+
+    return message;
+}
+
 void qemu_system_guest_panicked(GuestPanicInformation *info)
 {
     qemu_log_mask(LOG_GUEST_ERROR, "Guest crashed");
@@ -560,7 +606,19 @@  void qemu_system_guest_panicked(GuestPanicInformation *info)
                           S390CrashReason_str(info->u.s390.reason),
                           info->u.s390.psw_mask,
                           info->u.s390.psw_addr);
+        } else if (info->type == GUEST_PANIC_INFORMATION_TYPE_TDX) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          " TDX guest reports fatal error:"
+                          " error code: 0x%#" PRIx64 " error message:\"%s\"\n",
+                          info->u.tdx.error_code,
+                          tdx_parse_panic_message(info->u.tdx.message));
+            if (info->u.tdx.error_code & (1ull << 63)) {
+                qemu_log_mask(LOG_GUEST_ERROR, "Additional error information "
+                              "can be found at gpa page: 0x%#" PRIx64 "\n",
+                              info->u.tdx.gpa);
+            }
         }
+
         qapi_free_GuestPanicInformation(info);
     }
 }
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 6cd72a26b970..811a3b81af99 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -20,6 +20,7 @@ 
 #include "qom/object_interfaces.h"
 #include "standard-headers/asm-x86/kvm_para.h"
 #include "sysemu/kvm.h"
+#include "sysemu/runstate.h"
 #include "sysemu/sysemu.h"
 #include "exec/ramblock.h"
 
@@ -1092,11 +1093,26 @@  static int tdx_handle_get_quote(X86CPU *cpu, struct kvm_tdx_vmcall *vmcall)
     return 0;
 }
 
+static void tdx_panicked_on_fatal_error(X86CPU *cpu, uint64_t error_code,
+                                        char *message, uint64_t gpa)
+{
+    GuestPanicInformation *panic_info;
+
+    panic_info = g_new0(GuestPanicInformation, 1);
+    panic_info->type = GUEST_PANIC_INFORMATION_TYPE_TDX;
+    panic_info->u.tdx.error_code = error_code;
+    panic_info->u.tdx.message = message;
+    panic_info->u.tdx.gpa = gpa;
+
+    qemu_system_guest_panicked(panic_info);
+}
+
 static int tdx_handle_report_fatal_error(X86CPU *cpu,
                                          struct kvm_tdx_vmcall *vmcall)
 {
     uint64_t error_code = vmcall->in_r12;
     char *message = NULL;
+    uint64_t gpa = -1ull;
 
     if (error_code & 0xffff) {
         error_report("TDX: REPORT_FATAL_ERROR: invalid error code: "
@@ -1125,7 +1141,13 @@  static int tdx_handle_report_fatal_error(X86CPU *cpu,
         assert((char *)tmp == message + GUEST_PANIC_INFO_TDX_MESSAGE_MAX);
     }
 
-    error_report("TD guest reports fatal error. %s\n", message ? : "");
+#define TDX_REPORT_FATAL_ERROR_GPA_VALID    BIT_ULL(63)
+    if (error_code & TDX_REPORT_FATAL_ERROR_GPA_VALID) {
+        gpa = vmcall->in_r13;
+    }
+
+    tdx_panicked_on_fatal_error(cpu, error_code, message, gpa);
+
     return -1;
 }