diff mbox series

[v3,52/70] i386/tdx: handle TDG.VP.VMCALL<GetQuote>

Message ID 20231115071519.2864957-53-xiaoyao.li@intel.com
State New
Headers show
Series QEMU Guest memfd + QEMU TDX support | expand

Commit Message

Xiaoyao Li Nov. 15, 2023, 7:15 a.m. UTC
From: Isaku Yamahata <isaku.yamahata@intel.com>

For GetQuote, delegate a request to Quote Generation Service.
Add property "quote-generation-socket" to tdx-guest, whihc is a property
of type SocketAddress to specify Quote Generation Service(QGS).

On request, connect to the QGS, read request buffer from shared guest
memory, send the request buffer to the server and store the response
into shared guest memory and notify TD guest by interrupt.

command line example:
  qemu-system-x86_64 \
    -object '{"qom-type":"tdx-guest","id":"tdx0","quote-generation-socket":{"type": "vsock", "cid":"2","port":"1234"}}' \
    -machine confidential-guest-support=tdx0

Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Codeveloped-by: Chenyi Qiang <chenyi.qiang@intel.com>
Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
---
Changes in v3:
- rename property "quote-generation-service" to "quote-generation-socket";
- change the type of "quote-generation-socket" from str to
  SocketAddress;
- squash next patch into this one;
---
 qapi/qom.json         |   5 +-
 target/i386/kvm/tdx.c | 430 ++++++++++++++++++++++++++++++++++++++++++
 target/i386/kvm/tdx.h |   6 +
 3 files changed, 440 insertions(+), 1 deletion(-)

Comments

Daniel P. Berrangé Nov. 15, 2023, 5:51 p.m. UTC | #1
On Wed, Nov 15, 2023 at 02:15:01AM -0500, Xiaoyao Li wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
> 
> For GetQuote, delegate a request to Quote Generation Service.
> Add property "quote-generation-socket" to tdx-guest, whihc is a property
> of type SocketAddress to specify Quote Generation Service(QGS).
> 
> On request, connect to the QGS, read request buffer from shared guest
> memory, send the request buffer to the server and store the response
> into shared guest memory and notify TD guest by interrupt.
> 
> command line example:
>   qemu-system-x86_64 \
>     -object '{"qom-type":"tdx-guest","id":"tdx0","quote-generation-socket":{"type": "vsock", "cid":"2","port":"1234"}}' \
>     -machine confidential-guest-support=tdx0
> 
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> Codeveloped-by: Chenyi Qiang <chenyi.qiang@intel.com>
> Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
> ---
> Changes in v3:
> - rename property "quote-generation-service" to "quote-generation-socket";
> - change the type of "quote-generation-socket" from str to
>   SocketAddress;
> - squash next patch into this one;
> ---
>  qapi/qom.json         |   5 +-
>  target/i386/kvm/tdx.c | 430 ++++++++++++++++++++++++++++++++++++++++++
>  target/i386/kvm/tdx.h |   6 +
>  3 files changed, 440 insertions(+), 1 deletion(-)

> @@ -969,6 +1001,7 @@ static void tdx_guest_class_init(ObjectClass *oc, void *data)
>  {
>  }
>  
> +#define TDG_VP_VMCALL_GET_QUOTE                         0x10002ULL
>  #define TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT      0x10004ULL
>  
>  #define TDG_VP_VMCALL_SUCCESS           0x0000000000000000ULL
> @@ -977,6 +1010,400 @@ static void tdx_guest_class_init(ObjectClass *oc, void *data)
>  #define TDG_VP_VMCALL_GPA_INUSE         0x8000000000000001ULL
>  #define TDG_VP_VMCALL_ALIGN_ERROR       0x8000000000000002ULL
>  
> +#define TDX_GET_QUOTE_STRUCTURE_VERSION 1ULL
> +
> +#define TDX_VP_GET_QUOTE_SUCCESS                0ULL
> +#define TDX_VP_GET_QUOTE_IN_FLIGHT              (-1ULL)
> +#define TDX_VP_GET_QUOTE_ERROR                  0x8000000000000000ULL
> +#define TDX_VP_GET_QUOTE_QGS_UNAVAILABLE        0x8000000000000001ULL
> +
> +/* Limit to avoid resource starvation. */
> +#define TDX_GET_QUOTE_MAX_BUF_LEN       (128 * 1024)
> +#define TDX_MAX_GET_QUOTE_REQUEST       16
> +
> +/* Format of pages shared with guest. */
> +struct tdx_get_quote_header {
> +    /* Format version: must be 1 in little endian. */
> +    uint64_t structure_version;
> +
> +    /*
> +     * GetQuote status code in little endian:
> +     *   Guest must set error_code to 0 to avoid information leak.
> +     *   Qemu sets this before interrupting guest.
> +     */
> +    uint64_t error_code;
> +
> +    /*
> +     * in-message size in little endian: The message will follow this header.
> +     * The in-message will be send to QGS.
> +     */
> +    uint32_t in_len;
> +
> +    /*
> +     * out-message size in little endian:
> +     * On request, out_len must be zero to avoid information leak.
> +     * On return, message size from QGS. Qemu overwrites this field.
> +     * The message will follows this header.  The in-message is overwritten.
> +     */
> +    uint32_t out_len;
> +
> +    /*
> +     * Message buffer follows.
> +     * Guest sets message that will be send to QGS.  If out_len > in_len, guest
> +     * should zero remaining buffer to avoid information leak.
> +     * Qemu overwrites this buffer with a message returned from QGS.
> +     */
> +};
> +
> +static hwaddr tdx_shared_bit(X86CPU *cpu)
> +{
> +    return (cpu->phys_bits > 48) ? BIT_ULL(51) : BIT_ULL(47);
> +}
> +
> +struct tdx_get_quote_task {
> +    uint32_t apic_id;
> +    hwaddr gpa;
> +    uint64_t buf_len;
> +    char *out_data;
> +    uint64_t out_len;
> +    struct tdx_get_quote_header hdr;
> +    int event_notify_interrupt;
> +    QIOChannelSocket *ioc;
> +};
> +
> +struct x86_msi {
> +    union {
> +        struct {
> +            uint32_t    reserved_0              : 2,
> +                        dest_mode_logical       : 1,
> +                        redirect_hint           : 1,
> +                        reserved_1              : 1,
> +                        virt_destid_8_14        : 7,
> +                        destid_0_7              : 8,
> +                        base_address            : 12;
> +        } QEMU_PACKED x86_address_lo;
> +        uint32_t address_lo;
> +    };
> +    union {
> +        struct {
> +            uint32_t    reserved        : 8,
> +                        destid_8_31     : 24;
> +        } QEMU_PACKED x86_address_hi;
> +        uint32_t address_hi;
> +    };
> +    union {
> +        struct {
> +            uint32_t    vector                  : 8,
> +                        delivery_mode           : 3,
> +                        dest_mode_logical       : 1,
> +                        reserved                : 2,
> +                        active_low              : 1,
> +                        is_level                : 1;
> +        } QEMU_PACKED x86_data;
> +        uint32_t data;
> +    };
> +};
> +
> +static void tdx_td_notify(struct tdx_get_quote_task *t)
> +{
> +    struct x86_msi x86_msi;
> +    struct kvm_msi msi;
> +    int ret;
> +
> +    /* It is optional for host VMM to interrupt TD. */
> +    if(!(32 <= t->event_notify_interrupt && t->event_notify_interrupt <= 255))
> +        return;
> +
> +    x86_msi = (struct x86_msi) {
> +        .x86_address_lo  = {
> +            .reserved_0 = 0,
> +            .dest_mode_logical = 0,
> +            .redirect_hint = 0,
> +            .reserved_1 = 0,
> +            .virt_destid_8_14 = 0,
> +            .destid_0_7 = t->apic_id & 0xff,
> +        },
> +        .x86_address_hi = {
> +            .reserved = 0,
> +            .destid_8_31 = t->apic_id >> 8,
> +        },
> +        .x86_data = {
> +            .vector = t->event_notify_interrupt,
> +            .delivery_mode = APIC_DM_FIXED,
> +            .dest_mode_logical = 0,
> +            .reserved = 0,
> +            .active_low = 0,
> +            .is_level = 0,
> +        },
> +    };
> +    msi = (struct kvm_msi) {
> +        .address_lo = x86_msi.address_lo,
> +        .address_hi = x86_msi.address_hi,
> +        .data = x86_msi.data,
> +        .flags = 0,
> +        .devid = 0,
> +    };
> +    ret = kvm_vm_ioctl(kvm_state, KVM_SIGNAL_MSI, &msi);
> +    if (ret < 0) {
> +        /* In this case, no better way to tell it to guest.  Log it. */
> +        error_report("TDX: injection %d failed, interrupt lost (%s).\n",
> +                     t->event_notify_interrupt, strerror(-ret));
> +    }
> +}
> +
> +static void tdx_get_quote_read(void *opaque)
> +{
> +    struct tdx_get_quote_task *t = opaque;
> +    ssize_t size = 0;
> +    Error *err = NULL;

This error is set, but never read and more importantly
never freed.  If you're not going to use it just pass
NULL to the methods, otherwise use error_report_err to
print and free it.

> +    MachineState *ms;
> +    TdxGuest *tdx;
> +
> +    while (true) {
> +        char *buf;
> +        size_t buf_size;
> +
> +        if (t->out_len < t->buf_len) {
> +            buf = t->out_data + t->out_len;
> +            buf_size = t->buf_len - t->out_len;
> +        } else {
> +            /*
> +             * The received data is too large to fit in the shared GPA.
> +             * Discard the received data and try to know the data size.
> +             */
> +            buf = t->out_data;
> +            buf_size = t->buf_len;
> +        }
> +
> +        size = qio_channel_read(QIO_CHANNEL(t->ioc), buf, buf_size, &err);
> +        if (!size) {
> +            break;
> +        }
> +
> +        if (size < 0) {
> +            if (size == QIO_CHANNEL_ERR_BLOCK) {
> +                return;
> +            } else {
> +                break;
> +            }
> +        }
> +        t->out_len += size;
> +    }
> +    /*
> +     * If partial read successfully but return error at last, also treat it
> +     * as failure.
> +     */
> +    if (size < 0) {
> +        t->hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_QGS_UNAVAILABLE);
> +        goto error;
> +    }
> +    if (t->out_len > 0 && t->out_len > t->buf_len) {
> +        /*
> +         * There is no specific error code defined for this case(E2BIG) at the
> +         * moment.
> +         * TODO: Once an error code for this case is defined in GHCI spec ,
> +         * update the error code.
> +         */
> +        t->hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_ERROR);
> +        t->hdr.out_len = cpu_to_le32(t->out_len);
> +        goto error_hdr;
> +    }
> +
> +    if (address_space_write(
> +            &address_space_memory, t->gpa + sizeof(t->hdr),
> +            MEMTXATTRS_UNSPECIFIED, t->out_data, t->out_len) != MEMTX_OK) {
> +        goto error;
> +    }
> +    /*
> +     * Even if out_len == 0, it's a success.  It's up to the QGS-client contract
> +     * how to interpret the zero-sized message as return message.
> +     */
> +    t->hdr.out_len = cpu_to_le32(t->out_len);
> +    t->hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_SUCCESS);
> +
> +error:
> +    if (t->hdr.error_code != cpu_to_le64(TDX_VP_GET_QUOTE_SUCCESS)) {
> +        t->hdr.out_len = cpu_to_le32(0);
> +    }
> +error_hdr:
> +    if (address_space_write(
> +            &address_space_memory, t->gpa,
> +            MEMTXATTRS_UNSPECIFIED, &t->hdr, sizeof(t->hdr)) != MEMTX_OK) {
> +        error_report("TDX: failed to update GetQuote header.");
> +    }
> +    tdx_td_notify(t);
> +
> +    qemu_set_fd_handler(t->ioc->fd, NULL, NULL, NULL);
> +    qio_channel_close(QIO_CHANNEL(t->ioc), &err);

Likely overwriting a previously set 'err'

> +    object_unref(OBJECT(t->ioc));
> +    g_free(t->out_data);
> +    g_free(t);
> +
> +    /* Maintain the number of in-flight requests. */
> +    ms = MACHINE(qdev_get_machine());
> +    tdx = TDX_GUEST(ms->cgs);
> +    qemu_mutex_lock(&tdx->lock);
> +    tdx->quote_generation_num--;
> +    qemu_mutex_unlock(&tdx->lock);
> +}
> +
> +/*
> + * TODO: If QGS doesn't reply for long time, make it an error and interrupt
> + * guest.
> + */
> +static void tdx_handle_get_quote_connected(QIOTask *task, gpointer opaque)
> +{
> +    struct tdx_get_quote_task *t = opaque;
> +    Error *err = NULL;

Same leak problem in this method

> +    char *in_data = NULL;

g_autofree for simpler cleanup

> +    MachineState *ms;
> +    TdxGuest *tdx;
> +
> +    t->hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_ERROR);
> +    if (qio_task_propagate_error(task, NULL)) {
> +        t->hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_QGS_UNAVAILABLE);
> +        goto error;
> +    }
> +
> +    in_data = g_malloc(le32_to_cpu(t->hdr.in_len));

IF  't->hdr.in_len' is going from the guest then they needs
bounds checking, otherwise its a trivial denial of service
to make QEMU allocate all of host RAM.

> +    if (!in_data) {
> +        goto error;
> +    }
> +
> +    if (address_space_read(&address_space_memory, t->gpa + sizeof(t->hdr),
> +                           MEMTXATTRS_UNSPECIFIED, in_data,
> +                           le32_to_cpu(t->hdr.in_len)) != MEMTX_OK) {
> +        goto error;
> +    }
> +
> +    qio_channel_set_blocking(QIO_CHANNEL(t->ioc), false, NULL);
> +
> +    if (qio_channel_write_all(QIO_CHANNEL(t->ioc), in_data,
> +                              le32_to_cpu(t->hdr.in_len), &err) ||
> +        err) {
> +        t->hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_QGS_UNAVAILABLE);
> +        goto error;
> +    }
> +
> +    g_free(in_data);
> +    qemu_set_fd_handler(t->ioc->fd, tdx_get_quote_read, NULL, t);

Dn't use  qemu_set_fd_handler() with QIOChannel objects.
qio_channel_add_watch() is the API for dealing with event
callbacks

> +
> +    return;
> +error:
> +    t->hdr.out_len = cpu_to_le32(0);
> +
> +    if (address_space_write(
> +            &address_space_memory, t->gpa,
> +            MEMTXATTRS_UNSPECIFIED, &t->hdr, sizeof(t->hdr)) != MEMTX_OK) {
> +        error_report("TDX: failed to update GetQuote header.\n");
> +    }
> +    tdx_td_notify(t);
> +
> +    qio_channel_close(QIO_CHANNEL(t->ioc), &err);
> +    object_unref(OBJECT(t->ioc));
> +    g_free(t);
> +    g_free(in_data);
> +
> +    /* Maintain the number of in-flight requests. */
> +    ms = MACHINE(qdev_get_machine());
> +    tdx = TDX_GUEST(ms->cgs);
> +    qemu_mutex_lock(&tdx->lock);
> +    tdx->quote_generation_num--;
> +    qemu_mutex_unlock(&tdx->lock);
> +    return;
> +}
> +
> +static void tdx_handle_get_quote(X86CPU *cpu, struct kvm_tdx_vmcall *vmcall)
> +{
> +    hwaddr gpa = vmcall->in_r12;
> +    uint64_t buf_len = vmcall->in_r13;
> +    struct tdx_get_quote_header hdr;
> +    MachineState *ms;
> +    TdxGuest *tdx;
> +    QIOChannelSocket *ioc;
> +    struct tdx_get_quote_task *t;
> +
> +    vmcall->status_code = TDG_VP_VMCALL_INVALID_OPERAND;
> +
> +    /* GPA must be shared. */
> +    if (!(gpa & tdx_shared_bit(cpu))) {
> +        return;
> +    }
> +    gpa &= ~tdx_shared_bit(cpu);
> +
> +    if (!QEMU_IS_ALIGNED(gpa, 4096) || !QEMU_IS_ALIGNED(buf_len, 4096)) {
> +        vmcall->status_code = TDG_VP_VMCALL_ALIGN_ERROR;
> +        return;
> +    }
> +    if (buf_len == 0) {
> +        return;
> +    }
> +
> +    if (address_space_read(&address_space_memory, gpa, MEMTXATTRS_UNSPECIFIED,
> +                           &hdr, sizeof(hdr)) != MEMTX_OK) {
> +        return;
> +    }
> +    if (le64_to_cpu(hdr.structure_version) != TDX_GET_QUOTE_STRUCTURE_VERSION) {
> +        return;
> +    }
> +    /*
> +     * Paranoid: Guest should clear error_code and out_len to avoid information
> +     * leak.  Enforce it.  The initial value of them doesn't matter for qemu to
> +     * process the request.
> +     */
> +    if (le64_to_cpu(hdr.error_code) != TDX_VP_GET_QUOTE_SUCCESS ||
> +        le32_to_cpu(hdr.out_len) != 0) {
> +        return;
> +    }
> +
> +    /* Only safe-guard check to avoid too large buffer size. */
> +    if (buf_len > TDX_GET_QUOTE_MAX_BUF_LEN ||
> +        le32_to_cpu(hdr.in_len) > TDX_GET_QUOTE_MAX_BUF_LEN ||
> +        le32_to_cpu(hdr.in_len) > buf_len) {
> +        return;
> +    }
> +
> +    /* Mark the buffer in-flight. */
> +    hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_IN_FLIGHT);
> +    if (address_space_write(&address_space_memory, gpa, MEMTXATTRS_UNSPECIFIED,
> +                            &hdr, sizeof(hdr)) != MEMTX_OK) {
> +        return;
> +    }
> +
> +    ms = MACHINE(qdev_get_machine());
> +    tdx = TDX_GUEST(ms->cgs);
> +    ioc = qio_channel_socket_new();
> +
> +    t = g_malloc(sizeof(*t));
> +    t->apic_id = tdx->event_notify_apic_id;
> +    t->gpa = gpa;
> +    t->buf_len = buf_len;
> +    t->out_data = g_malloc(t->buf_len);
> +    t->out_len = 0;
> +    t->hdr = hdr;
> +    t->ioc = ioc;
> +
> +    qemu_mutex_lock(&tdx->lock);
> +    if (!tdx->quote_generation ||
> +        /* Prevent too many in-flight get-quote request. */
> +        tdx->quote_generation_num >= TDX_MAX_GET_QUOTE_REQUEST) {
> +        qemu_mutex_unlock(&tdx->lock);
> +        vmcall->status_code = TDG_VP_VMCALL_RETRY;
> +        object_unref(OBJECT(ioc));
> +        g_free(t->out_data);
> +        g_free(t);
> +        return;
> +    }
> +    tdx->quote_generation_num++;
> +    t->event_notify_interrupt = tdx->event_notify_interrupt;
> +    qio_channel_socket_connect_async(
> +        ioc, tdx->quote_generation, tdx_handle_get_quote_connected, t, NULL,
> +        NULL);
> +    qemu_mutex_unlock(&tdx->lock);
> +
> +    vmcall->status_code = TDG_VP_VMCALL_SUCCESS;
> +}
> +
>  static void tdx_handle_setup_event_notify_interrupt(X86CPU *cpu,
>                                                      struct kvm_tdx_vmcall *vmcall)
>  {
> @@ -1005,6 +1432,9 @@ static void tdx_handle_vmcall(X86CPU *cpu, struct kvm_tdx_vmcall *vmcall)
>      }
>  
>      switch (vmcall->subfunction) {
> +    case TDG_VP_VMCALL_GET_QUOTE:
> +        tdx_handle_get_quote(cpu, vmcall);
> +        break;
>      case TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT:
>          tdx_handle_setup_event_notify_interrupt(cpu, vmcall);
>          break;
> diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h
> index 4a8d67cc9fdb..4a989805493e 100644
> --- a/target/i386/kvm/tdx.h
> +++ b/target/i386/kvm/tdx.h
> @@ -5,8 +5,10 @@
>  #include CONFIG_DEVICES /* CONFIG_TDX */
>  #endif
>  
> +#include <linux/kvm.h>
>  #include "exec/confidential-guest-support.h"
>  #include "hw/i386/tdvf.h"
> +#include "io/channel-socket.h"
>  #include "sysemu/kvm.h"
>  
>  #define TYPE_TDX_GUEST "tdx-guest"
> @@ -47,6 +49,10 @@ typedef struct TdxGuest {
>      /* runtime state */
>      int event_notify_interrupt;
>      uint32_t event_notify_apic_id;
> +
> +    /* GetQuote */
> +    int quote_generation_num;
> +    SocketAddress *quote_generation;
>  } TdxGuest;

IMHO all the quote generation logic would benefit from being split
out into a completely separate self contained files

eg 'tdx-quote-generation.{c,h}'

this should define an object "TdxQuoteGenerator" which  holds these
two quote_generation_num and quote_generation  fields, and exposes
a high level API for each command taking inputs & outputs,
and doing serialization to/from the socket.  This API should do
verification of all command inputs eg the length field to prevent
guest denial of service.

The tdx_handle_get_quote() method could then call into this API.

This will give us clean separation between interaction with guest
memory, and interaction with the socket.

With regards,
Daniel
Daniel P. Berrangé Nov. 15, 2023, 5:58 p.m. UTC | #2
On Wed, Nov 15, 2023 at 02:15:01AM -0500, Xiaoyao Li wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
> 
> For GetQuote, delegate a request to Quote Generation Service.
> Add property "quote-generation-socket" to tdx-guest, whihc is a property
> of type SocketAddress to specify Quote Generation Service(QGS).
> 
> On request, connect to the QGS, read request buffer from shared guest
> memory, send the request buffer to the server and store the response
> into shared guest memory and notify TD guest by interrupt.
> 
> command line example:
>   qemu-system-x86_64 \
>     -object '{"qom-type":"tdx-guest","id":"tdx0","quote-generation-socket":{"type": "vsock", "cid":"2","port":"1234"}}' \
>     -machine confidential-guest-support=tdx0
> 
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> Codeveloped-by: Chenyi Qiang <chenyi.qiang@intel.com>
> Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
> ---
> Changes in v3:
> - rename property "quote-generation-service" to "quote-generation-socket";
> - change the type of "quote-generation-socket" from str to
>   SocketAddress;
> - squash next patch into this one;
> ---
>  qapi/qom.json         |   5 +-
>  target/i386/kvm/tdx.c | 430 ++++++++++++++++++++++++++++++++++++++++++
>  target/i386/kvm/tdx.h |   6 +
>  3 files changed, 440 insertions(+), 1 deletion(-)
> 
> +static void tdx_handle_get_quote_connected(QIOTask *task, gpointer opaque)
> +{
> +    struct tdx_get_quote_task *t = opaque;
> +    Error *err = NULL;
> +    char *in_data = NULL;
> +    MachineState *ms;
> +    TdxGuest *tdx;
> +
> +    t->hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_ERROR);
> +    if (qio_task_propagate_error(task, NULL)) {
> +        t->hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_QGS_UNAVAILABLE);
> +        goto error;
> +    }
> +
> +    in_data = g_malloc(le32_to_cpu(t->hdr.in_len));
> +    if (!in_data) {
> +        goto error;
> +    }
> +
> +    if (address_space_read(&address_space_memory, t->gpa + sizeof(t->hdr),
> +                           MEMTXATTRS_UNSPECIFIED, in_data,
> +                           le32_to_cpu(t->hdr.in_len)) != MEMTX_OK) {
> +        goto error;
> +    }
> +
> +    qio_channel_set_blocking(QIO_CHANNEL(t->ioc), false, NULL);

You've set the channel to non-blocking, but....

> +
> +    if (qio_channel_write_all(QIO_CHANNEL(t->ioc), in_data,
> +                              le32_to_cpu(t->hdr.in_len), &err) ||
> +        err) {

...this method will block execution of this thread, by either
sleeping in poll() or doing a coroutine yield.

I don't think this is in coroutine context, so presumably this
is just blocking.  So what was the point in marking the channel
non-blocking ?

You are setting up a background watch to wait for the reply
so we don't block this thread, so you seem to want non-blocking
behaviour.

Given this, you should not be using qio_channel_write_all()
most likely. I think you need to be using qio_channel_add_watch
to get notified when it is *writable*, to send 'in_data'
incrementally & non-blocking. When that is finished then create
another watch to wait for the reply.


> +        t->hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_QGS_UNAVAILABLE);
> +        goto error;
> +    }
> +
> +    g_free(in_data);
> +    qemu_set_fd_handler(t->ioc->fd, tdx_get_quote_read, NULL, t);
> +
> +    return;
> +error:
> +    t->hdr.out_len = cpu_to_le32(0);
> +
> +    if (address_space_write(
> +            &address_space_memory, t->gpa,
> +            MEMTXATTRS_UNSPECIFIED, &t->hdr, sizeof(t->hdr)) != MEMTX_OK) {
> +        error_report("TDX: failed to update GetQuote header.\n");
> +    }
> +    tdx_td_notify(t);
> +
> +    qio_channel_close(QIO_CHANNEL(t->ioc), &err);
> +    object_unref(OBJECT(t->ioc));
> +    g_free(t);
> +    g_free(in_data);
> +
> +    /* Maintain the number of in-flight requests. */
> +    ms = MACHINE(qdev_get_machine());
> +    tdx = TDX_GUEST(ms->cgs);
> +    qemu_mutex_lock(&tdx->lock);
> +    tdx->quote_generation_num--;
> +    qemu_mutex_unlock(&tdx->lock);
> +    return;
> +}
> +
> +static void tdx_handle_get_quote(X86CPU *cpu, struct kvm_tdx_vmcall *vmcall)
> +{
> +    hwaddr gpa = vmcall->in_r12;
> +    uint64_t buf_len = vmcall->in_r13;
> +    struct tdx_get_quote_header hdr;
> +    MachineState *ms;
> +    TdxGuest *tdx;
> +    QIOChannelSocket *ioc;
> +    struct tdx_get_quote_task *t;
> +
> +    vmcall->status_code = TDG_VP_VMCALL_INVALID_OPERAND;
> +
> +    /* GPA must be shared. */
> +    if (!(gpa & tdx_shared_bit(cpu))) {
> +        return;
> +    }
> +    gpa &= ~tdx_shared_bit(cpu);
> +
> +    if (!QEMU_IS_ALIGNED(gpa, 4096) || !QEMU_IS_ALIGNED(buf_len, 4096)) {
> +        vmcall->status_code = TDG_VP_VMCALL_ALIGN_ERROR;
> +        return;
> +    }
> +    if (buf_len == 0) {
> +        return;
> +    }
> +
> +    if (address_space_read(&address_space_memory, gpa, MEMTXATTRS_UNSPECIFIED,
> +                           &hdr, sizeof(hdr)) != MEMTX_OK) {
> +        return;
> +    }
> +    if (le64_to_cpu(hdr.structure_version) != TDX_GET_QUOTE_STRUCTURE_VERSION) {
> +        return;
> +    }
> +    /*
> +     * Paranoid: Guest should clear error_code and out_len to avoid information
> +     * leak.  Enforce it.  The initial value of them doesn't matter for qemu to
> +     * process the request.
> +     */
> +    if (le64_to_cpu(hdr.error_code) != TDX_VP_GET_QUOTE_SUCCESS ||
> +        le32_to_cpu(hdr.out_len) != 0) {
> +        return;
> +    }
> +
> +    /* Only safe-guard check to avoid too large buffer size. */
> +    if (buf_len > TDX_GET_QUOTE_MAX_BUF_LEN ||
> +        le32_to_cpu(hdr.in_len) > TDX_GET_QUOTE_MAX_BUF_LEN ||
> +        le32_to_cpu(hdr.in_len) > buf_len) {
> +        return;
> +    }
> +
> +    /* Mark the buffer in-flight. */
> +    hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_IN_FLIGHT);
> +    if (address_space_write(&address_space_memory, gpa, MEMTXATTRS_UNSPECIFIED,
> +                            &hdr, sizeof(hdr)) != MEMTX_OK) {
> +        return;
> +    }
> +
> +    ms = MACHINE(qdev_get_machine());
> +    tdx = TDX_GUEST(ms->cgs);
> +    ioc = qio_channel_socket_new();
> +
> +    t = g_malloc(sizeof(*t));
> +    t->apic_id = tdx->event_notify_apic_id;
> +    t->gpa = gpa;
> +    t->buf_len = buf_len;
> +    t->out_data = g_malloc(t->buf_len);
> +    t->out_len = 0;
> +    t->hdr = hdr;
> +    t->ioc = ioc;
> +
> +    qemu_mutex_lock(&tdx->lock);
> +    if (!tdx->quote_generation ||
> +        /* Prevent too many in-flight get-quote request. */
> +        tdx->quote_generation_num >= TDX_MAX_GET_QUOTE_REQUEST) {
> +        qemu_mutex_unlock(&tdx->lock);
> +        vmcall->status_code = TDG_VP_VMCALL_RETRY;
> +        object_unref(OBJECT(ioc));
> +        g_free(t->out_data);
> +        g_free(t);
> +        return;
> +    }
> +    tdx->quote_generation_num++;
> +    t->event_notify_interrupt = tdx->event_notify_interrupt;
> +    qio_channel_socket_connect_async(
> +        ioc, tdx->quote_generation, tdx_handle_get_quote_connected, t, NULL,
> +        NULL);
> +    qemu_mutex_unlock(&tdx->lock);
> +
> +    vmcall->status_code = TDG_VP_VMCALL_SUCCESS;
> +}
> +
>  static void tdx_handle_setup_event_notify_interrupt(X86CPU *cpu,
>                                                      struct kvm_tdx_vmcall *vmcall)
>  {
> @@ -1005,6 +1432,9 @@ static void tdx_handle_vmcall(X86CPU *cpu, struct kvm_tdx_vmcall *vmcall)
>      }
>  
>      switch (vmcall->subfunction) {
> +    case TDG_VP_VMCALL_GET_QUOTE:
> +        tdx_handle_get_quote(cpu, vmcall);
> +        break;
>      case TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT:
>          tdx_handle_setup_event_notify_interrupt(cpu, vmcall);
>          break;
> diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h
> index 4a8d67cc9fdb..4a989805493e 100644
> --- a/target/i386/kvm/tdx.h
> +++ b/target/i386/kvm/tdx.h
> @@ -5,8 +5,10 @@
>  #include CONFIG_DEVICES /* CONFIG_TDX */
>  #endif
>  
> +#include <linux/kvm.h>
>  #include "exec/confidential-guest-support.h"
>  #include "hw/i386/tdvf.h"
> +#include "io/channel-socket.h"
>  #include "sysemu/kvm.h"
>  
>  #define TYPE_TDX_GUEST "tdx-guest"
> @@ -47,6 +49,10 @@ typedef struct TdxGuest {
>      /* runtime state */
>      int event_notify_interrupt;
>      uint32_t event_notify_apic_id;
> +
> +    /* GetQuote */
> +    int quote_generation_num;
> +    SocketAddress *quote_generation;
>  } TdxGuest;
>  
>  #ifdef CONFIG_TDX
> -- 
> 2.34.1
> 

With regards,
Daniel
Markus Armbruster Dec. 1, 2023, 11:02 a.m. UTC | #3
Xiaoyao Li <xiaoyao.li@intel.com> writes:

> From: Isaku Yamahata <isaku.yamahata@intel.com>
>
> For GetQuote, delegate a request to Quote Generation Service.
> Add property "quote-generation-socket" to tdx-guest, whihc is a property
> of type SocketAddress to specify Quote Generation Service(QGS).
>
> On request, connect to the QGS, read request buffer from shared guest
> memory, send the request buffer to the server and store the response
> into shared guest memory and notify TD guest by interrupt.
>
> command line example:
>   qemu-system-x86_64 \
>     -object '{"qom-type":"tdx-guest","id":"tdx0","quote-generation-socket":{"type": "vsock", "cid":"2","port":"1234"}}' \
>     -machine confidential-guest-support=tdx0
>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> Codeveloped-by: Chenyi Qiang <chenyi.qiang@intel.com>
> Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
> ---
> Changes in v3:
> - rename property "quote-generation-service" to "quote-generation-socket";
> - change the type of "quote-generation-socket" from str to
>   SocketAddress;
> - squash next patch into this one;
> ---
>  qapi/qom.json         |   5 +-
>  target/i386/kvm/tdx.c | 430 ++++++++++++++++++++++++++++++++++++++++++
>  target/i386/kvm/tdx.h |   6 +
>  3 files changed, 440 insertions(+), 1 deletion(-)
>
> diff --git a/qapi/qom.json b/qapi/qom.json
> index fd99aa1ff8cc..cf36a1832ddd 100644
> --- a/qapi/qom.json
> +++ b/qapi/qom.json
> @@ -894,13 +894,16 @@
>  #
>  # @mrownerconfig: base64 MROWNERCONFIG SHA384 digest
>  #
> +# @quote-generation-socket: socket address for Quote Generation Service(QGS)
> +#

Long line.  Better:

   # @quote-generation-socket: socket address for Quote Generation
   #     Service(QGS)

>  # Since: 8.2
>  ##
>  { 'struct': 'TdxGuestProperties',
>    'data': { '*sept-ve-disable': 'bool',
>              '*mrconfigid': 'str',
>              '*mrowner': 'str',
> -            '*mrownerconfig': 'str' } }
> +            '*mrownerconfig': 'str',
> +            '*quote-generation-socket': 'SocketAddress' } }
>  
>  ##
>  # @ThreadContextProperties:
Xiaoyao Li Dec. 7, 2023, 7:38 a.m. UTC | #4
On 12/1/2023 7:02 PM, Markus Armbruster wrote:
> Xiaoyao Li <xiaoyao.li@intel.com> writes:
> 
>> From: Isaku Yamahata <isaku.yamahata@intel.com>
>>
>> For GetQuote, delegate a request to Quote Generation Service.
>> Add property "quote-generation-socket" to tdx-guest, whihc is a property
>> of type SocketAddress to specify Quote Generation Service(QGS).
>>
>> On request, connect to the QGS, read request buffer from shared guest
>> memory, send the request buffer to the server and store the response
>> into shared guest memory and notify TD guest by interrupt.
>>
>> command line example:
>>    qemu-system-x86_64 \
>>      -object '{"qom-type":"tdx-guest","id":"tdx0","quote-generation-socket":{"type": "vsock", "cid":"2","port":"1234"}}' \
>>      -machine confidential-guest-support=tdx0
>>
>> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
>> Codeveloped-by: Chenyi Qiang <chenyi.qiang@intel.com>
>> Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
>> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
>> ---
>> Changes in v3:
>> - rename property "quote-generation-service" to "quote-generation-socket";
>> - change the type of "quote-generation-socket" from str to
>>    SocketAddress;
>> - squash next patch into this one;
>> ---
>>   qapi/qom.json         |   5 +-
>>   target/i386/kvm/tdx.c | 430 ++++++++++++++++++++++++++++++++++++++++++
>>   target/i386/kvm/tdx.h |   6 +
>>   3 files changed, 440 insertions(+), 1 deletion(-)
>>
>> diff --git a/qapi/qom.json b/qapi/qom.json
>> index fd99aa1ff8cc..cf36a1832ddd 100644
>> --- a/qapi/qom.json
>> +++ b/qapi/qom.json
>> @@ -894,13 +894,16 @@
>>   #
>>   # @mrownerconfig: base64 MROWNERCONFIG SHA384 digest
>>   #
>> +# @quote-generation-socket: socket address for Quote Generation Service(QGS)
>> +#
> 
> Long line.  Better:
> 
>     # @quote-generation-socket: socket address for Quote Generation
>     #     Service(QGS)

May I ask what's the limitation for qom.json? if 80 columns limitation 
doesn't apply to it.

>>   # Since: 8.2
>>   ##
>>   { 'struct': 'TdxGuestProperties',
>>     'data': { '*sept-ve-disable': 'bool',
>>               '*mrconfigid': 'str',
>>               '*mrowner': 'str',
>> -            '*mrownerconfig': 'str' } }
>> +            '*mrownerconfig': 'str',
>> +            '*quote-generation-socket': 'SocketAddress' } }
>>   
>>   ##
>>   # @ThreadContextProperties:
>
Markus Armbruster Dec. 7, 2023, 9:20 a.m. UTC | #5
Xiaoyao Li <xiaoyao.li@intel.com> writes:

> On 12/1/2023 7:02 PM, Markus Armbruster wrote:
>> Xiaoyao Li <xiaoyao.li@intel.com> writes:
>> 
>>> From: Isaku Yamahata <isaku.yamahata@intel.com>
>>>
>>> For GetQuote, delegate a request to Quote Generation Service.
>>> Add property "quote-generation-socket" to tdx-guest, whihc is a property
>>> of type SocketAddress to specify Quote Generation Service(QGS).
>>>
>>> On request, connect to the QGS, read request buffer from shared guest
>>> memory, send the request buffer to the server and store the response
>>> into shared guest memory and notify TD guest by interrupt.
>>>
>>> command line example:
>>>    qemu-system-x86_64 \
>>>      -object '{"qom-type":"tdx-guest","id":"tdx0","quote-generation-socket":{"type": "vsock", "cid":"2","port":"1234"}}' \
>>>      -machine confidential-guest-support=tdx0
>>>
>>> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
>>> Codeveloped-by: Chenyi Qiang <chenyi.qiang@intel.com>
>>> Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
>>> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
>>> ---
>>> Changes in v3:
>>> - rename property "quote-generation-service" to "quote-generation-socket";
>>> - change the type of "quote-generation-socket" from str to
>>>    SocketAddress;
>>> - squash next patch into this one;
>>> ---
>>>   qapi/qom.json         |   5 +-
>>>   target/i386/kvm/tdx.c | 430 ++++++++++++++++++++++++++++++++++++++++++
>>>   target/i386/kvm/tdx.h |   6 +
>>>   3 files changed, 440 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/qapi/qom.json b/qapi/qom.json
>>> index fd99aa1ff8cc..cf36a1832ddd 100644
>>> --- a/qapi/qom.json
>>> +++ b/qapi/qom.json
>>> @@ -894,13 +894,16 @@
>>>   #
>>>   # @mrownerconfig: base64 MROWNERCONFIG SHA384 digest
>>>   #
>>> +# @quote-generation-socket: socket address for Quote Generation Service(QGS)
>>> +#
>> Long line.  Better:
>>     # @quote-generation-socket: socket address for Quote Generation
>>     #     Service(QGS)
>
> May I ask what's the limitation for qom.json? if 80 columns limitation doesn't apply to it.

docs/devel/qapi-code-gen.rst section "Documentation markup":

    For legibility, wrap text paragraphs so every line is at most 70
    characters long.

Why is this not 80?  Humans tend to have trouble following long lines
with their eyes (I sure do).  Typographic manuals suggest to limit
columns to roughly 60 characters for exactly that reason[*].

For code, four levels of indentation plus 60 characters of actual text
yields 76.  However, code lines can be awkward to break, and going over
80 can be less bad than an awkward line break.  Use your judgement.

Documentation text, however, tends to be indented much less: 6-10
characters of indentation plus 60 of actual text yields 66-70.  When I
reflowed the entire QAPI schema documentation to stay within that limit
(commit a937b6aa739), not a single line break was awkward.

>>>   # Since: 8.2
>>>   ##
>>>   { 'struct': 'TdxGuestProperties',
>>>     'data': { '*sept-ve-disable': 'bool',
>>>               '*mrconfigid': 'str',
>>>               '*mrowner': 'str',
>>> -            '*mrownerconfig': 'str' } }
>>> +            '*mrownerconfig': 'str',
>>> +            '*quote-generation-socket': 'SocketAddress' } }
>>>     ##
>>>   # @ThreadContextProperties:
>> 

[*] https://en.wikipedia.org/wiki/Column_(typography)#Typographic_style
Daniel P. Berrangé Dec. 21, 2023, 11:05 a.m. UTC | #6
On Wed, Nov 15, 2023 at 02:15:01AM -0500, Xiaoyao Li wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
> 
> For GetQuote, delegate a request to Quote Generation Service.
> Add property "quote-generation-socket" to tdx-guest, whihc is a property
> of type SocketAddress to specify Quote Generation Service(QGS).
> 
> On request, connect to the QGS, read request buffer from shared guest
> memory, send the request buffer to the server and store the response
> into shared guest memory and notify TD guest by interrupt.
> 
> command line example:
>   qemu-system-x86_64 \
>     -object '{"qom-type":"tdx-guest","id":"tdx0","quote-generation-socket":{"type": "vsock", "cid":"2","port":"1234"}}' \

Here you're illustrating a VSOCK address.  IIUC, both the 'qgs'
daemon and QEMU will be running in the host. Why would they need
to be using VSOCK, as opposed to a regular UNIX socket connection ?

>     -machine confidential-guest-support=tdx0
> 
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> Codeveloped-by: Chenyi Qiang <chenyi.qiang@intel.com>
> Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
> ---
> Changes in v3:
> - rename property "quote-generation-service" to "quote-generation-socket";
> - change the type of "quote-generation-socket" from str to
>   SocketAddress;

With regards,
Daniel
Xiaoyao Li Dec. 22, 2023, 3:14 a.m. UTC | #7
On 12/21/2023 7:05 PM, Daniel P. Berrangé wrote:
> On Wed, Nov 15, 2023 at 02:15:01AM -0500, Xiaoyao Li wrote:
>> From: Isaku Yamahata <isaku.yamahata@intel.com>
>>
>> For GetQuote, delegate a request to Quote Generation Service.
>> Add property "quote-generation-socket" to tdx-guest, whihc is a property
>> of type SocketAddress to specify Quote Generation Service(QGS).
>>
>> On request, connect to the QGS, read request buffer from shared guest
>> memory, send the request buffer to the server and store the response
>> into shared guest memory and notify TD guest by interrupt.
>>
>> command line example:
>>    qemu-system-x86_64 \
>>      -object '{"qom-type":"tdx-guest","id":"tdx0","quote-generation-socket":{"type": "vsock", "cid":"2","port":"1234"}}' \
> 
> Here you're illustrating a VSOCK address.  IIUC, both the 'qgs'
> daemon and QEMU will be running in the host. Why would they need
> to be using VSOCK, as opposed to a regular UNIX socket connection ?
> 

We use vsock here because the QGS server we used for testing exposes the 
vsock socket.

I will add more examples in next version to show that any socket type is 
supported.
Daniel P. Berrangé Dec. 22, 2023, 1:14 p.m. UTC | #8
On Fri, Dec 22, 2023 at 11:14:12AM +0800, Xiaoyao Li wrote:
> On 12/21/2023 7:05 PM, Daniel P. Berrangé wrote:
> > On Wed, Nov 15, 2023 at 02:15:01AM -0500, Xiaoyao Li wrote:
> > > From: Isaku Yamahata <isaku.yamahata@intel.com>
> > > 
> > > For GetQuote, delegate a request to Quote Generation Service.
> > > Add property "quote-generation-socket" to tdx-guest, whihc is a property
> > > of type SocketAddress to specify Quote Generation Service(QGS).
> > > 
> > > On request, connect to the QGS, read request buffer from shared guest
> > > memory, send the request buffer to the server and store the response
> > > into shared guest memory and notify TD guest by interrupt.
> > > 
> > > command line example:
> > >    qemu-system-x86_64 \
> > >      -object '{"qom-type":"tdx-guest","id":"tdx0","quote-generation-socket":{"type": "vsock", "cid":"2","port":"1234"}}' \
> > 
> > Here you're illustrating a VSOCK address.  IIUC, both the 'qgs'
> > daemon and QEMU will be running in the host. Why would they need
> > to be using VSOCK, as opposed to a regular UNIX socket connection ?
> > 
> 
> We use vsock here because the QGS server we used for testing exposes the
> vsock socket.

Is this is the server impl you test with:

  https://github.com/intel/SGXDataCenterAttestationPrimitives/tree/master/QuoteGeneration/quote_wrapper/qgs

or is there another impl ?

With regards,
Daniel
Xiaoyao Li Dec. 25, 2023, 12:34 p.m. UTC | #9
On 12/22/2023 9:14 PM, Daniel P. Berrangé wrote:
> On Fri, Dec 22, 2023 at 11:14:12AM +0800, Xiaoyao Li wrote:
>> On 12/21/2023 7:05 PM, Daniel P. Berrangé wrote:
>>> On Wed, Nov 15, 2023 at 02:15:01AM -0500, Xiaoyao Li wrote:
>>>> From: Isaku Yamahata <isaku.yamahata@intel.com>
>>>>
>>>> For GetQuote, delegate a request to Quote Generation Service.
>>>> Add property "quote-generation-socket" to tdx-guest, whihc is a property
>>>> of type SocketAddress to specify Quote Generation Service(QGS).
>>>>
>>>> On request, connect to the QGS, read request buffer from shared guest
>>>> memory, send the request buffer to the server and store the response
>>>> into shared guest memory and notify TD guest by interrupt.
>>>>
>>>> command line example:
>>>>     qemu-system-x86_64 \
>>>>       -object '{"qom-type":"tdx-guest","id":"tdx0","quote-generation-socket":{"type": "vsock", "cid":"2","port":"1234"}}' \
>>>
>>> Here you're illustrating a VSOCK address.  IIUC, both the 'qgs'
>>> daemon and QEMU will be running in the host. Why would they need
>>> to be using VSOCK, as opposed to a regular UNIX socket connection ?
>>>
>>
>> We use vsock here because the QGS server we used for testing exposes the
>> vsock socket.
> 
> Is this is the server impl you test with:
> 
>    https://github.com/intel/SGXDataCenterAttestationPrimitives/tree/master/QuoteGeneration/quote_wrapper/qgs

I think it should be.

I used applications/services bundled by internal teams.

> or is there another impl ?
> 
> With regards,
> Daniel
Xiaoyao Li Dec. 29, 2023, 2:30 a.m. UTC | #10
On 11/16/2023 1:58 AM, Daniel P. Berrangé wrote:
> On Wed, Nov 15, 2023 at 02:15:01AM -0500, Xiaoyao Li wrote:
>> From: Isaku Yamahata <isaku.yamahata@intel.com>
>>
>> For GetQuote, delegate a request to Quote Generation Service.
>> Add property "quote-generation-socket" to tdx-guest, whihc is a property
>> of type SocketAddress to specify Quote Generation Service(QGS).
>>
>> On request, connect to the QGS, read request buffer from shared guest
>> memory, send the request buffer to the server and store the response
>> into shared guest memory and notify TD guest by interrupt.
>>
>> command line example:
>>    qemu-system-x86_64 \
>>      -object '{"qom-type":"tdx-guest","id":"tdx0","quote-generation-socket":{"type": "vsock", "cid":"2","port":"1234"}}' \
>>      -machine confidential-guest-support=tdx0
>>
>> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
>> Codeveloped-by: Chenyi Qiang <chenyi.qiang@intel.com>
>> Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
>> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
>> ---
>> Changes in v3:
>> - rename property "quote-generation-service" to "quote-generation-socket";
>> - change the type of "quote-generation-socket" from str to
>>    SocketAddress;
>> - squash next patch into this one;
>> ---
>>   qapi/qom.json         |   5 +-
>>   target/i386/kvm/tdx.c | 430 ++++++++++++++++++++++++++++++++++++++++++
>>   target/i386/kvm/tdx.h |   6 +
>>   3 files changed, 440 insertions(+), 1 deletion(-)
>>
>> +static void tdx_handle_get_quote_connected(QIOTask *task, gpointer opaque)
>> +{
>> +    struct tdx_get_quote_task *t = opaque;
>> +    Error *err = NULL;
>> +    char *in_data = NULL;
>> +    MachineState *ms;
>> +    TdxGuest *tdx;
>> +
>> +    t->hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_ERROR);
>> +    if (qio_task_propagate_error(task, NULL)) {
>> +        t->hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_QGS_UNAVAILABLE);
>> +        goto error;
>> +    }
>> +
>> +    in_data = g_malloc(le32_to_cpu(t->hdr.in_len));
>> +    if (!in_data) {
>> +        goto error;
>> +    }
>> +
>> +    if (address_space_read(&address_space_memory, t->gpa + sizeof(t->hdr),
>> +                           MEMTXATTRS_UNSPECIFIED, in_data,
>> +                           le32_to_cpu(t->hdr.in_len)) != MEMTX_OK) {
>> +        goto error;
>> +    }
>> +
>> +    qio_channel_set_blocking(QIO_CHANNEL(t->ioc), false, NULL);
> 
> You've set the channel to non-blocking, but....
> 
>> +
>> +    if (qio_channel_write_all(QIO_CHANNEL(t->ioc), in_data,
>> +                              le32_to_cpu(t->hdr.in_len), &err) ||
>> +        err) {
> 
> ...this method will block execution of this thread, by either
> sleeping in poll() or doing a coroutine yield.
> 
> I don't think this is in coroutine context, so presumably this
> is just blocking.  So what was the point in marking the channel
> non-blocking ?

Hi Dainel,

First of all, I'm not good at socket or qio channel thing. Please 
correct me and teach me when I'm wrong.

I'm not the author of this patch. My understanding is that, set it to 
non-blocking is for the qio_channel_write_all() to proceed immediately?

If set non-blocking is not needed, I can remove it.

> You are setting up a background watch to wait for the reply
> so we don't block this thread, so you seem to want non-blocking
> behaviour.

Both sending and receiving are in a new thread created by 
qio_channel_socket_connect_async(). So I think both of then can be 
blocking and don't need to be in another background thread.

what's your suggestion on it? Make both sending and receiving blocking 
or non-blocking?

> Given this, you should not be using qio_channel_write_all()
> most likely. I think you need to be using qio_channel_add_watch
> to get notified when it is *writable*, to send 'in_data'
> incrementally & non-blocking. When that is finished then create
> another watch to wait for the reply.
> 
> 
>> +        t->hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_QGS_UNAVAILABLE);
>> +        goto error;
>> +    }
>> +
>> +    g_free(in_data);
>> +    qemu_set_fd_handler(t->ioc->fd, tdx_get_quote_read, NULL, t);
>> +
>> +    return;
>> +error:
>> +    t->hdr.out_len = cpu_to_le32(0);
>> +
>> +    if (address_space_write(
>> +            &address_space_memory, t->gpa,
>> +            MEMTXATTRS_UNSPECIFIED, &t->hdr, sizeof(t->hdr)) != MEMTX_OK) {
>> +        error_report("TDX: failed to update GetQuote header.\n");
>> +    }
>> +    tdx_td_notify(t);
>> +
>> +    qio_channel_close(QIO_CHANNEL(t->ioc), &err);
>> +    object_unref(OBJECT(t->ioc));
>> +    g_free(t);
>> +    g_free(in_data);
>> +
>> +    /* Maintain the number of in-flight requests. */
>> +    ms = MACHINE(qdev_get_machine());
>> +    tdx = TDX_GUEST(ms->cgs);
>> +    qemu_mutex_lock(&tdx->lock);
>> +    tdx->quote_generation_num--;
>> +    qemu_mutex_unlock(&tdx->lock);
>> +    return;
>> +}
>> +
>> +static void tdx_handle_get_quote(X86CPU *cpu, struct kvm_tdx_vmcall *vmcall)
>> +{
>> +    hwaddr gpa = vmcall->in_r12;
>> +    uint64_t buf_len = vmcall->in_r13;
>> +    struct tdx_get_quote_header hdr;
>> +    MachineState *ms;
>> +    TdxGuest *tdx;
>> +    QIOChannelSocket *ioc;
>> +    struct tdx_get_quote_task *t;
>> +
>> +    vmcall->status_code = TDG_VP_VMCALL_INVALID_OPERAND;
>> +
>> +    /* GPA must be shared. */
>> +    if (!(gpa & tdx_shared_bit(cpu))) {
>> +        return;
>> +    }
>> +    gpa &= ~tdx_shared_bit(cpu);
>> +
>> +    if (!QEMU_IS_ALIGNED(gpa, 4096) || !QEMU_IS_ALIGNED(buf_len, 4096)) {
>> +        vmcall->status_code = TDG_VP_VMCALL_ALIGN_ERROR;
>> +        return;
>> +    }
>> +    if (buf_len == 0) {
>> +        return;
>> +    }
>> +
>> +    if (address_space_read(&address_space_memory, gpa, MEMTXATTRS_UNSPECIFIED,
>> +                           &hdr, sizeof(hdr)) != MEMTX_OK) {
>> +        return;
>> +    }
>> +    if (le64_to_cpu(hdr.structure_version) != TDX_GET_QUOTE_STRUCTURE_VERSION) {
>> +        return;
>> +    }
>> +    /*
>> +     * Paranoid: Guest should clear error_code and out_len to avoid information
>> +     * leak.  Enforce it.  The initial value of them doesn't matter for qemu to
>> +     * process the request.
>> +     */
>> +    if (le64_to_cpu(hdr.error_code) != TDX_VP_GET_QUOTE_SUCCESS ||
>> +        le32_to_cpu(hdr.out_len) != 0) {
>> +        return;
>> +    }
>> +
>> +    /* Only safe-guard check to avoid too large buffer size. */
>> +    if (buf_len > TDX_GET_QUOTE_MAX_BUF_LEN ||
>> +        le32_to_cpu(hdr.in_len) > TDX_GET_QUOTE_MAX_BUF_LEN ||
>> +        le32_to_cpu(hdr.in_len) > buf_len) {
>> +        return;
>> +    }
>> +
>> +    /* Mark the buffer in-flight. */
>> +    hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_IN_FLIGHT);
>> +    if (address_space_write(&address_space_memory, gpa, MEMTXATTRS_UNSPECIFIED,
>> +                            &hdr, sizeof(hdr)) != MEMTX_OK) {
>> +        return;
>> +    }
>> +
>> +    ms = MACHINE(qdev_get_machine());
>> +    tdx = TDX_GUEST(ms->cgs);
>> +    ioc = qio_channel_socket_new();
>> +
>> +    t = g_malloc(sizeof(*t));
>> +    t->apic_id = tdx->event_notify_apic_id;
>> +    t->gpa = gpa;
>> +    t->buf_len = buf_len;
>> +    t->out_data = g_malloc(t->buf_len);
>> +    t->out_len = 0;
>> +    t->hdr = hdr;
>> +    t->ioc = ioc;
>> +
>> +    qemu_mutex_lock(&tdx->lock);
>> +    if (!tdx->quote_generation ||
>> +        /* Prevent too many in-flight get-quote request. */
>> +        tdx->quote_generation_num >= TDX_MAX_GET_QUOTE_REQUEST) {
>> +        qemu_mutex_unlock(&tdx->lock);
>> +        vmcall->status_code = TDG_VP_VMCALL_RETRY;
>> +        object_unref(OBJECT(ioc));
>> +        g_free(t->out_data);
>> +        g_free(t);
>> +        return;
>> +    }
>> +    tdx->quote_generation_num++;
>> +    t->event_notify_interrupt = tdx->event_notify_interrupt;
>> +    qio_channel_socket_connect_async(
>> +        ioc, tdx->quote_generation, tdx_handle_get_quote_connected, t, NULL,
>> +        NULL);
>> +    qemu_mutex_unlock(&tdx->lock);
>> +
>> +    vmcall->status_code = TDG_VP_VMCALL_SUCCESS;
>> +}
>> +
>>   static void tdx_handle_setup_event_notify_interrupt(X86CPU *cpu,
>>                                                       struct kvm_tdx_vmcall *vmcall)
>>   {
>> @@ -1005,6 +1432,9 @@ static void tdx_handle_vmcall(X86CPU *cpu, struct kvm_tdx_vmcall *vmcall)
>>       }
>>   
>>       switch (vmcall->subfunction) {
>> +    case TDG_VP_VMCALL_GET_QUOTE:
>> +        tdx_handle_get_quote(cpu, vmcall);
>> +        break;
>>       case TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT:
>>           tdx_handle_setup_event_notify_interrupt(cpu, vmcall);
>>           break;
>> diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h
>> index 4a8d67cc9fdb..4a989805493e 100644
>> --- a/target/i386/kvm/tdx.h
>> +++ b/target/i386/kvm/tdx.h
>> @@ -5,8 +5,10 @@
>>   #include CONFIG_DEVICES /* CONFIG_TDX */
>>   #endif
>>   
>> +#include <linux/kvm.h>
>>   #include "exec/confidential-guest-support.h"
>>   #include "hw/i386/tdvf.h"
>> +#include "io/channel-socket.h"
>>   #include "sysemu/kvm.h"
>>   
>>   #define TYPE_TDX_GUEST "tdx-guest"
>> @@ -47,6 +49,10 @@ typedef struct TdxGuest {
>>       /* runtime state */
>>       int event_notify_interrupt;
>>       uint32_t event_notify_apic_id;
>> +
>> +    /* GetQuote */
>> +    int quote_generation_num;
>> +    SocketAddress *quote_generation;
>>   } TdxGuest;
>>   
>>   #ifdef CONFIG_TDX
>> -- 
>> 2.34.1
>>
> 
> With regards,
> Daniel
Daniel P. Berrangé Jan. 8, 2024, 2:44 p.m. UTC | #11
On Fri, Dec 29, 2023 at 10:30:15AM +0800, Xiaoyao Li wrote:
> On 11/16/2023 1:58 AM, Daniel P. Berrangé wrote:
> > On Wed, Nov 15, 2023 at 02:15:01AM -0500, Xiaoyao Li wrote:
> > > From: Isaku Yamahata <isaku.yamahata@intel.com>
> > > 
> > > For GetQuote, delegate a request to Quote Generation Service.
> > > Add property "quote-generation-socket" to tdx-guest, whihc is a property
> > > of type SocketAddress to specify Quote Generation Service(QGS).
> > > 
> > > On request, connect to the QGS, read request buffer from shared guest
> > > memory, send the request buffer to the server and store the response
> > > into shared guest memory and notify TD guest by interrupt.
> > > 
> > > command line example:
> > >    qemu-system-x86_64 \
> > >      -object '{"qom-type":"tdx-guest","id":"tdx0","quote-generation-socket":{"type": "vsock", "cid":"2","port":"1234"}}' \
> > >      -machine confidential-guest-support=tdx0
> > > 
> > > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> > > Codeveloped-by: Chenyi Qiang <chenyi.qiang@intel.com>
> > > Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
> > > Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
> > > ---
> > > Changes in v3:
> > > - rename property "quote-generation-service" to "quote-generation-socket";
> > > - change the type of "quote-generation-socket" from str to
> > >    SocketAddress;
> > > - squash next patch into this one;
> > > ---
> > >   qapi/qom.json         |   5 +-
> > >   target/i386/kvm/tdx.c | 430 ++++++++++++++++++++++++++++++++++++++++++
> > >   target/i386/kvm/tdx.h |   6 +
> > >   3 files changed, 440 insertions(+), 1 deletion(-)
> > > 
> > > +static void tdx_handle_get_quote_connected(QIOTask *task, gpointer opaque)
> > > +{
> > > +    struct tdx_get_quote_task *t = opaque;
> > > +    Error *err = NULL;
> > > +    char *in_data = NULL;
> > > +    MachineState *ms;
> > > +    TdxGuest *tdx;
> > > +
> > > +    t->hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_ERROR);
> > > +    if (qio_task_propagate_error(task, NULL)) {
> > > +        t->hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_QGS_UNAVAILABLE);
> > > +        goto error;
> > > +    }
> > > +
> > > +    in_data = g_malloc(le32_to_cpu(t->hdr.in_len));
> > > +    if (!in_data) {
> > > +        goto error;
> > > +    }
> > > +
> > > +    if (address_space_read(&address_space_memory, t->gpa + sizeof(t->hdr),
> > > +                           MEMTXATTRS_UNSPECIFIED, in_data,
> > > +                           le32_to_cpu(t->hdr.in_len)) != MEMTX_OK) {
> > > +        goto error;
> > > +    }
> > > +
> > > +    qio_channel_set_blocking(QIO_CHANNEL(t->ioc), false, NULL);
> > 
> > You've set the channel to non-blocking, but....
> > 
> > > +
> > > +    if (qio_channel_write_all(QIO_CHANNEL(t->ioc), in_data,
> > > +                              le32_to_cpu(t->hdr.in_len), &err) ||
> > > +        err) {
> > 
> > ...this method will block execution of this thread, by either
> > sleeping in poll() or doing a coroutine yield.
> > 
> > I don't think this is in coroutine context, so presumably this
> > is just blocking.  So what was the point in marking the channel
> > non-blocking ?
> 
> Hi Dainel,
> 
> First of all, I'm not good at socket or qio channel thing. Please correct me
> and teach me when I'm wrong.
> 
> I'm not the author of this patch. My understanding is that, set it to
> non-blocking is for the qio_channel_write_all() to proceed immediately?

The '_all' suffixed methods are implemented such that they will
sleep in poll(), or a coroutine yield when seeing EAGAIN. 

> If set non-blocking is not needed, I can remove it.
> 
> > You are setting up a background watch to wait for the reply
> > so we don't block this thread, so you seem to want non-blocking
> > behaviour.
> 
> Both sending and receiving are in a new thread created by
> qio_channel_socket_connect_async(). So I think both of then can be blocking
> and don't need to be in another background thread.
> 
> what's your suggestion on it? Make both sending and receiving blocking or
> non-blocking?

I think the code /should/ be non-blocking, which would mean
using   qio_channel_write, instead of qio_channel_write_all,
and using a .

With regards,
Daniel
Xiaoyao Li Jan. 9, 2024, 5:38 a.m. UTC | #12
On 1/8/2024 10:44 PM, Daniel P. Berrangé wrote:
> On Fri, Dec 29, 2023 at 10:30:15AM +0800, Xiaoyao Li wrote:
>> On 11/16/2023 1:58 AM, Daniel P. Berrangé wrote:
>>> On Wed, Nov 15, 2023 at 02:15:01AM -0500, Xiaoyao Li wrote:
>>>> From: Isaku Yamahata <isaku.yamahata@intel.com>
>>>>
>>>> For GetQuote, delegate a request to Quote Generation Service.
>>>> Add property "quote-generation-socket" to tdx-guest, whihc is a property
>>>> of type SocketAddress to specify Quote Generation Service(QGS).
>>>>
>>>> On request, connect to the QGS, read request buffer from shared guest
>>>> memory, send the request buffer to the server and store the response
>>>> into shared guest memory and notify TD guest by interrupt.
>>>>
>>>> command line example:
>>>>     qemu-system-x86_64 \
>>>>       -object '{"qom-type":"tdx-guest","id":"tdx0","quote-generation-socket":{"type": "vsock", "cid":"2","port":"1234"}}' \
>>>>       -machine confidential-guest-support=tdx0
>>>>
>>>> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
>>>> Codeveloped-by: Chenyi Qiang <chenyi.qiang@intel.com>
>>>> Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
>>>> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
>>>> ---
>>>> Changes in v3:
>>>> - rename property "quote-generation-service" to "quote-generation-socket";
>>>> - change the type of "quote-generation-socket" from str to
>>>>     SocketAddress;
>>>> - squash next patch into this one;
>>>> ---
>>>>    qapi/qom.json         |   5 +-
>>>>    target/i386/kvm/tdx.c | 430 ++++++++++++++++++++++++++++++++++++++++++
>>>>    target/i386/kvm/tdx.h |   6 +
>>>>    3 files changed, 440 insertions(+), 1 deletion(-)
>>>>
>>>> +static void tdx_handle_get_quote_connected(QIOTask *task, gpointer opaque)
>>>> +{
>>>> +    struct tdx_get_quote_task *t = opaque;
>>>> +    Error *err = NULL;
>>>> +    char *in_data = NULL;
>>>> +    MachineState *ms;
>>>> +    TdxGuest *tdx;
>>>> +
>>>> +    t->hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_ERROR);
>>>> +    if (qio_task_propagate_error(task, NULL)) {
>>>> +        t->hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_QGS_UNAVAILABLE);
>>>> +        goto error;
>>>> +    }
>>>> +
>>>> +    in_data = g_malloc(le32_to_cpu(t->hdr.in_len));
>>>> +    if (!in_data) {
>>>> +        goto error;
>>>> +    }
>>>> +
>>>> +    if (address_space_read(&address_space_memory, t->gpa + sizeof(t->hdr),
>>>> +                           MEMTXATTRS_UNSPECIFIED, in_data,
>>>> +                           le32_to_cpu(t->hdr.in_len)) != MEMTX_OK) {
>>>> +        goto error;
>>>> +    }
>>>> +
>>>> +    qio_channel_set_blocking(QIO_CHANNEL(t->ioc), false, NULL);
>>>
>>> You've set the channel to non-blocking, but....
>>>
>>>> +
>>>> +    if (qio_channel_write_all(QIO_CHANNEL(t->ioc), in_data,
>>>> +                              le32_to_cpu(t->hdr.in_len), &err) ||
>>>> +        err) {
>>>
>>> ...this method will block execution of this thread, by either
>>> sleeping in poll() or doing a coroutine yield.
>>>
>>> I don't think this is in coroutine context, so presumably this
>>> is just blocking.  So what was the point in marking the channel
>>> non-blocking ?
>>
>> Hi Dainel,
>>
>> First of all, I'm not good at socket or qio channel thing. Please correct me
>> and teach me when I'm wrong.
>>
>> I'm not the author of this patch. My understanding is that, set it to
>> non-blocking is for the qio_channel_write_all() to proceed immediately?
> 
> The '_all' suffixed methods are implemented such that they will
> sleep in poll(), or a coroutine yield when seeing EAGAIN.
> 
>> If set non-blocking is not needed, I can remove it.
>>
>>> You are setting up a background watch to wait for the reply
>>> so we don't block this thread, so you seem to want non-blocking
>>> behaviour.
>>
>> Both sending and receiving are in a new thread created by
>> qio_channel_socket_connect_async(). So I think both of then can be blocking
>> and don't need to be in another background thread.
>>
>> what's your suggestion on it? Make both sending and receiving blocking or
>> non-blocking?
> 
> I think the code /should/ be non-blocking, which would mean
> using   qio_channel_write, instead of qio_channel_write_all,
> and using a .

I see. will implement in the next version.

> With regards,
> Daniel
diff mbox series

Patch

diff --git a/qapi/qom.json b/qapi/qom.json
index fd99aa1ff8cc..cf36a1832ddd 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -894,13 +894,16 @@ 
 #
 # @mrownerconfig: base64 MROWNERCONFIG SHA384 digest
 #
+# @quote-generation-socket: socket address for Quote Generation Service(QGS)
+#
 # Since: 8.2
 ##
 { 'struct': 'TdxGuestProperties',
   'data': { '*sept-ve-disable': 'bool',
             '*mrconfigid': 'str',
             '*mrowner': 'str',
-            '*mrownerconfig': 'str' } }
+            '*mrownerconfig': 'str',
+            '*quote-generation-socket': 'SocketAddress' } }
 
 ##
 # @ThreadContextProperties:
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 5fc5d857fb6f..54b38c031fb3 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -16,6 +16,7 @@ 
 #include "qemu/base64.h"
 #include "qemu/mmap-alloc.h"
 #include "qapi/error.h"
+#include "qapi/qapi-visit-sockets.h"
 #include "qom/object_interfaces.h"
 #include "standard-headers/asm-x86/kvm_para.h"
 #include "sysemu/kvm.h"
@@ -23,6 +24,8 @@ 
 #include "exec/address-spaces.h"
 #include "exec/ramblock.h"
 
+#include "exec/address-spaces.h"
+#include "hw/i386/apic_internal.h"
 #include "hw/i386/e820_memory_layout.h"
 #include "hw/i386/x86.h"
 #include "hw/i386/tdvf.h"
@@ -923,6 +926,29 @@  static void tdx_guest_set_mrownerconfig(Object *obj, const char *value, Error **
     tdx->mrconfigid = g_strdup(value);
 }
 
+static void tdx_guest_get_quote_generation(Object *obj, Visitor *v,
+                                            const char *name, void *opaque,
+                                            Error **errp)
+{
+    TdxGuest *tdx = TDX_GUEST(obj);
+
+    visit_type_SocketAddress(v, name, &tdx->quote_generation, errp);
+}
+
+static void tdx_guest_set_quote_generation(Object *obj, Visitor *v,
+                                           const char *name, void *opaque,
+                                           Error **errp)
+{
+    TdxGuest *tdx = TDX_GUEST(obj);
+    SocketAddress *sock = NULL;
+
+    if (!visit_type_SocketAddress(v, name, &sock, errp)) {
+        return;
+    }
+
+    tdx->quote_generation = sock;
+}
+
 /* tdx guest */
 OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest,
                                    tdx_guest,
@@ -957,6 +983,12 @@  static void tdx_guest_init(Object *obj)
                             tdx_guest_get_mrownerconfig,
                             tdx_guest_set_mrownerconfig);
 
+    tdx->quote_generation = NULL;
+    object_property_add(obj, "quote-generation-socket", "SocketAddress",
+                            tdx_guest_get_quote_generation,
+                            tdx_guest_set_quote_generation,
+                            NULL, NULL);
+
     tdx->event_notify_interrupt = -1;
     tdx->event_notify_apic_id = -1;
 }
@@ -969,6 +1001,7 @@  static void tdx_guest_class_init(ObjectClass *oc, void *data)
 {
 }
 
+#define TDG_VP_VMCALL_GET_QUOTE                         0x10002ULL
 #define TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT      0x10004ULL
 
 #define TDG_VP_VMCALL_SUCCESS           0x0000000000000000ULL
@@ -977,6 +1010,400 @@  static void tdx_guest_class_init(ObjectClass *oc, void *data)
 #define TDG_VP_VMCALL_GPA_INUSE         0x8000000000000001ULL
 #define TDG_VP_VMCALL_ALIGN_ERROR       0x8000000000000002ULL
 
+#define TDX_GET_QUOTE_STRUCTURE_VERSION 1ULL
+
+#define TDX_VP_GET_QUOTE_SUCCESS                0ULL
+#define TDX_VP_GET_QUOTE_IN_FLIGHT              (-1ULL)
+#define TDX_VP_GET_QUOTE_ERROR                  0x8000000000000000ULL
+#define TDX_VP_GET_QUOTE_QGS_UNAVAILABLE        0x8000000000000001ULL
+
+/* Limit to avoid resource starvation. */
+#define TDX_GET_QUOTE_MAX_BUF_LEN       (128 * 1024)
+#define TDX_MAX_GET_QUOTE_REQUEST       16
+
+/* Format of pages shared with guest. */
+struct tdx_get_quote_header {
+    /* Format version: must be 1 in little endian. */
+    uint64_t structure_version;
+
+    /*
+     * GetQuote status code in little endian:
+     *   Guest must set error_code to 0 to avoid information leak.
+     *   Qemu sets this before interrupting guest.
+     */
+    uint64_t error_code;
+
+    /*
+     * in-message size in little endian: The message will follow this header.
+     * The in-message will be send to QGS.
+     */
+    uint32_t in_len;
+
+    /*
+     * out-message size in little endian:
+     * On request, out_len must be zero to avoid information leak.
+     * On return, message size from QGS. Qemu overwrites this field.
+     * The message will follows this header.  The in-message is overwritten.
+     */
+    uint32_t out_len;
+
+    /*
+     * Message buffer follows.
+     * Guest sets message that will be send to QGS.  If out_len > in_len, guest
+     * should zero remaining buffer to avoid information leak.
+     * Qemu overwrites this buffer with a message returned from QGS.
+     */
+};
+
+static hwaddr tdx_shared_bit(X86CPU *cpu)
+{
+    return (cpu->phys_bits > 48) ? BIT_ULL(51) : BIT_ULL(47);
+}
+
+struct tdx_get_quote_task {
+    uint32_t apic_id;
+    hwaddr gpa;
+    uint64_t buf_len;
+    char *out_data;
+    uint64_t out_len;
+    struct tdx_get_quote_header hdr;
+    int event_notify_interrupt;
+    QIOChannelSocket *ioc;
+};
+
+struct x86_msi {
+    union {
+        struct {
+            uint32_t    reserved_0              : 2,
+                        dest_mode_logical       : 1,
+                        redirect_hint           : 1,
+                        reserved_1              : 1,
+                        virt_destid_8_14        : 7,
+                        destid_0_7              : 8,
+                        base_address            : 12;
+        } QEMU_PACKED x86_address_lo;
+        uint32_t address_lo;
+    };
+    union {
+        struct {
+            uint32_t    reserved        : 8,
+                        destid_8_31     : 24;
+        } QEMU_PACKED x86_address_hi;
+        uint32_t address_hi;
+    };
+    union {
+        struct {
+            uint32_t    vector                  : 8,
+                        delivery_mode           : 3,
+                        dest_mode_logical       : 1,
+                        reserved                : 2,
+                        active_low              : 1,
+                        is_level                : 1;
+        } QEMU_PACKED x86_data;
+        uint32_t data;
+    };
+};
+
+static void tdx_td_notify(struct tdx_get_quote_task *t)
+{
+    struct x86_msi x86_msi;
+    struct kvm_msi msi;
+    int ret;
+
+    /* It is optional for host VMM to interrupt TD. */
+    if(!(32 <= t->event_notify_interrupt && t->event_notify_interrupt <= 255))
+        return;
+
+    x86_msi = (struct x86_msi) {
+        .x86_address_lo  = {
+            .reserved_0 = 0,
+            .dest_mode_logical = 0,
+            .redirect_hint = 0,
+            .reserved_1 = 0,
+            .virt_destid_8_14 = 0,
+            .destid_0_7 = t->apic_id & 0xff,
+        },
+        .x86_address_hi = {
+            .reserved = 0,
+            .destid_8_31 = t->apic_id >> 8,
+        },
+        .x86_data = {
+            .vector = t->event_notify_interrupt,
+            .delivery_mode = APIC_DM_FIXED,
+            .dest_mode_logical = 0,
+            .reserved = 0,
+            .active_low = 0,
+            .is_level = 0,
+        },
+    };
+    msi = (struct kvm_msi) {
+        .address_lo = x86_msi.address_lo,
+        .address_hi = x86_msi.address_hi,
+        .data = x86_msi.data,
+        .flags = 0,
+        .devid = 0,
+    };
+    ret = kvm_vm_ioctl(kvm_state, KVM_SIGNAL_MSI, &msi);
+    if (ret < 0) {
+        /* In this case, no better way to tell it to guest.  Log it. */
+        error_report("TDX: injection %d failed, interrupt lost (%s).\n",
+                     t->event_notify_interrupt, strerror(-ret));
+    }
+}
+
+static void tdx_get_quote_read(void *opaque)
+{
+    struct tdx_get_quote_task *t = opaque;
+    ssize_t size = 0;
+    Error *err = NULL;
+    MachineState *ms;
+    TdxGuest *tdx;
+
+    while (true) {
+        char *buf;
+        size_t buf_size;
+
+        if (t->out_len < t->buf_len) {
+            buf = t->out_data + t->out_len;
+            buf_size = t->buf_len - t->out_len;
+        } else {
+            /*
+             * The received data is too large to fit in the shared GPA.
+             * Discard the received data and try to know the data size.
+             */
+            buf = t->out_data;
+            buf_size = t->buf_len;
+        }
+
+        size = qio_channel_read(QIO_CHANNEL(t->ioc), buf, buf_size, &err);
+        if (!size) {
+            break;
+        }
+
+        if (size < 0) {
+            if (size == QIO_CHANNEL_ERR_BLOCK) {
+                return;
+            } else {
+                break;
+            }
+        }
+        t->out_len += size;
+    }
+    /*
+     * If partial read successfully but return error at last, also treat it
+     * as failure.
+     */
+    if (size < 0) {
+        t->hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_QGS_UNAVAILABLE);
+        goto error;
+    }
+    if (t->out_len > 0 && t->out_len > t->buf_len) {
+        /*
+         * There is no specific error code defined for this case(E2BIG) at the
+         * moment.
+         * TODO: Once an error code for this case is defined in GHCI spec ,
+         * update the error code.
+         */
+        t->hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_ERROR);
+        t->hdr.out_len = cpu_to_le32(t->out_len);
+        goto error_hdr;
+    }
+
+    if (address_space_write(
+            &address_space_memory, t->gpa + sizeof(t->hdr),
+            MEMTXATTRS_UNSPECIFIED, t->out_data, t->out_len) != MEMTX_OK) {
+        goto error;
+    }
+    /*
+     * Even if out_len == 0, it's a success.  It's up to the QGS-client contract
+     * how to interpret the zero-sized message as return message.
+     */
+    t->hdr.out_len = cpu_to_le32(t->out_len);
+    t->hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_SUCCESS);
+
+error:
+    if (t->hdr.error_code != cpu_to_le64(TDX_VP_GET_QUOTE_SUCCESS)) {
+        t->hdr.out_len = cpu_to_le32(0);
+    }
+error_hdr:
+    if (address_space_write(
+            &address_space_memory, t->gpa,
+            MEMTXATTRS_UNSPECIFIED, &t->hdr, sizeof(t->hdr)) != MEMTX_OK) {
+        error_report("TDX: failed to update GetQuote header.");
+    }
+    tdx_td_notify(t);
+
+    qemu_set_fd_handler(t->ioc->fd, NULL, NULL, NULL);
+    qio_channel_close(QIO_CHANNEL(t->ioc), &err);
+    object_unref(OBJECT(t->ioc));
+    g_free(t->out_data);
+    g_free(t);
+
+    /* Maintain the number of in-flight requests. */
+    ms = MACHINE(qdev_get_machine());
+    tdx = TDX_GUEST(ms->cgs);
+    qemu_mutex_lock(&tdx->lock);
+    tdx->quote_generation_num--;
+    qemu_mutex_unlock(&tdx->lock);
+}
+
+/*
+ * TODO: If QGS doesn't reply for long time, make it an error and interrupt
+ * guest.
+ */
+static void tdx_handle_get_quote_connected(QIOTask *task, gpointer opaque)
+{
+    struct tdx_get_quote_task *t = opaque;
+    Error *err = NULL;
+    char *in_data = NULL;
+    MachineState *ms;
+    TdxGuest *tdx;
+
+    t->hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_ERROR);
+    if (qio_task_propagate_error(task, NULL)) {
+        t->hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_QGS_UNAVAILABLE);
+        goto error;
+    }
+
+    in_data = g_malloc(le32_to_cpu(t->hdr.in_len));
+    if (!in_data) {
+        goto error;
+    }
+
+    if (address_space_read(&address_space_memory, t->gpa + sizeof(t->hdr),
+                           MEMTXATTRS_UNSPECIFIED, in_data,
+                           le32_to_cpu(t->hdr.in_len)) != MEMTX_OK) {
+        goto error;
+    }
+
+    qio_channel_set_blocking(QIO_CHANNEL(t->ioc), false, NULL);
+
+    if (qio_channel_write_all(QIO_CHANNEL(t->ioc), in_data,
+                              le32_to_cpu(t->hdr.in_len), &err) ||
+        err) {
+        t->hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_QGS_UNAVAILABLE);
+        goto error;
+    }
+
+    g_free(in_data);
+    qemu_set_fd_handler(t->ioc->fd, tdx_get_quote_read, NULL, t);
+
+    return;
+error:
+    t->hdr.out_len = cpu_to_le32(0);
+
+    if (address_space_write(
+            &address_space_memory, t->gpa,
+            MEMTXATTRS_UNSPECIFIED, &t->hdr, sizeof(t->hdr)) != MEMTX_OK) {
+        error_report("TDX: failed to update GetQuote header.\n");
+    }
+    tdx_td_notify(t);
+
+    qio_channel_close(QIO_CHANNEL(t->ioc), &err);
+    object_unref(OBJECT(t->ioc));
+    g_free(t);
+    g_free(in_data);
+
+    /* Maintain the number of in-flight requests. */
+    ms = MACHINE(qdev_get_machine());
+    tdx = TDX_GUEST(ms->cgs);
+    qemu_mutex_lock(&tdx->lock);
+    tdx->quote_generation_num--;
+    qemu_mutex_unlock(&tdx->lock);
+    return;
+}
+
+static void tdx_handle_get_quote(X86CPU *cpu, struct kvm_tdx_vmcall *vmcall)
+{
+    hwaddr gpa = vmcall->in_r12;
+    uint64_t buf_len = vmcall->in_r13;
+    struct tdx_get_quote_header hdr;
+    MachineState *ms;
+    TdxGuest *tdx;
+    QIOChannelSocket *ioc;
+    struct tdx_get_quote_task *t;
+
+    vmcall->status_code = TDG_VP_VMCALL_INVALID_OPERAND;
+
+    /* GPA must be shared. */
+    if (!(gpa & tdx_shared_bit(cpu))) {
+        return;
+    }
+    gpa &= ~tdx_shared_bit(cpu);
+
+    if (!QEMU_IS_ALIGNED(gpa, 4096) || !QEMU_IS_ALIGNED(buf_len, 4096)) {
+        vmcall->status_code = TDG_VP_VMCALL_ALIGN_ERROR;
+        return;
+    }
+    if (buf_len == 0) {
+        return;
+    }
+
+    if (address_space_read(&address_space_memory, gpa, MEMTXATTRS_UNSPECIFIED,
+                           &hdr, sizeof(hdr)) != MEMTX_OK) {
+        return;
+    }
+    if (le64_to_cpu(hdr.structure_version) != TDX_GET_QUOTE_STRUCTURE_VERSION) {
+        return;
+    }
+    /*
+     * Paranoid: Guest should clear error_code and out_len to avoid information
+     * leak.  Enforce it.  The initial value of them doesn't matter for qemu to
+     * process the request.
+     */
+    if (le64_to_cpu(hdr.error_code) != TDX_VP_GET_QUOTE_SUCCESS ||
+        le32_to_cpu(hdr.out_len) != 0) {
+        return;
+    }
+
+    /* Only safe-guard check to avoid too large buffer size. */
+    if (buf_len > TDX_GET_QUOTE_MAX_BUF_LEN ||
+        le32_to_cpu(hdr.in_len) > TDX_GET_QUOTE_MAX_BUF_LEN ||
+        le32_to_cpu(hdr.in_len) > buf_len) {
+        return;
+    }
+
+    /* Mark the buffer in-flight. */
+    hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_IN_FLIGHT);
+    if (address_space_write(&address_space_memory, gpa, MEMTXATTRS_UNSPECIFIED,
+                            &hdr, sizeof(hdr)) != MEMTX_OK) {
+        return;
+    }
+
+    ms = MACHINE(qdev_get_machine());
+    tdx = TDX_GUEST(ms->cgs);
+    ioc = qio_channel_socket_new();
+
+    t = g_malloc(sizeof(*t));
+    t->apic_id = tdx->event_notify_apic_id;
+    t->gpa = gpa;
+    t->buf_len = buf_len;
+    t->out_data = g_malloc(t->buf_len);
+    t->out_len = 0;
+    t->hdr = hdr;
+    t->ioc = ioc;
+
+    qemu_mutex_lock(&tdx->lock);
+    if (!tdx->quote_generation ||
+        /* Prevent too many in-flight get-quote request. */
+        tdx->quote_generation_num >= TDX_MAX_GET_QUOTE_REQUEST) {
+        qemu_mutex_unlock(&tdx->lock);
+        vmcall->status_code = TDG_VP_VMCALL_RETRY;
+        object_unref(OBJECT(ioc));
+        g_free(t->out_data);
+        g_free(t);
+        return;
+    }
+    tdx->quote_generation_num++;
+    t->event_notify_interrupt = tdx->event_notify_interrupt;
+    qio_channel_socket_connect_async(
+        ioc, tdx->quote_generation, tdx_handle_get_quote_connected, t, NULL,
+        NULL);
+    qemu_mutex_unlock(&tdx->lock);
+
+    vmcall->status_code = TDG_VP_VMCALL_SUCCESS;
+}
+
 static void tdx_handle_setup_event_notify_interrupt(X86CPU *cpu,
                                                     struct kvm_tdx_vmcall *vmcall)
 {
@@ -1005,6 +1432,9 @@  static void tdx_handle_vmcall(X86CPU *cpu, struct kvm_tdx_vmcall *vmcall)
     }
 
     switch (vmcall->subfunction) {
+    case TDG_VP_VMCALL_GET_QUOTE:
+        tdx_handle_get_quote(cpu, vmcall);
+        break;
     case TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT:
         tdx_handle_setup_event_notify_interrupt(cpu, vmcall);
         break;
diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h
index 4a8d67cc9fdb..4a989805493e 100644
--- a/target/i386/kvm/tdx.h
+++ b/target/i386/kvm/tdx.h
@@ -5,8 +5,10 @@ 
 #include CONFIG_DEVICES /* CONFIG_TDX */
 #endif
 
+#include <linux/kvm.h>
 #include "exec/confidential-guest-support.h"
 #include "hw/i386/tdvf.h"
+#include "io/channel-socket.h"
 #include "sysemu/kvm.h"
 
 #define TYPE_TDX_GUEST "tdx-guest"
@@ -47,6 +49,10 @@  typedef struct TdxGuest {
     /* runtime state */
     int event_notify_interrupt;
     uint32_t event_notify_apic_id;
+
+    /* GetQuote */
+    int quote_generation_num;
+    SocketAddress *quote_generation;
 } TdxGuest;
 
 #ifdef CONFIG_TDX