diff mbox series

[v21,03/20] target/s390x/cpu topology: handle STSI(15) and build the SYSIB

Message ID 20230630091752.67190-4-pmorel@linux.ibm.com
State New
Headers show
Series s390x: CPU Topology | expand

Commit Message

Pierre Morel June 30, 2023, 9:17 a.m. UTC
On interception of STSI(15.1.x) the System Information Block
(SYSIB) is built from the list of pre-ordered topology entries.

Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
---
 MAINTAINERS                      |   1 +
 qapi/machine-target.json         |  14 ++
 include/hw/s390x/cpu-topology.h  |  25 +++
 include/hw/s390x/sclp.h          |   1 +
 target/s390x/cpu.h               |  76 ++++++++
 hw/s390x/cpu-topology.c          |   4 +-
 target/s390x/kvm/kvm.c           |   5 +-
 target/s390x/kvm/stsi-topology.c | 310 +++++++++++++++++++++++++++++++
 target/s390x/kvm/meson.build     |   3 +-
 9 files changed, 436 insertions(+), 3 deletions(-)
 create mode 100644 target/s390x/kvm/stsi-topology.c

Comments

Thomas Huth July 4, 2023, 11:40 a.m. UTC | #1
On 30/06/2023 11.17, Pierre Morel wrote:
> On interception of STSI(15.1.x) the System Information Block
> (SYSIB) is built from the list of pre-ordered topology entries.
> 
> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
> ---
...
> diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
> index 7ebd5e05b6..6e7d041b01 100644
> --- a/target/s390x/cpu.h
> +++ b/target/s390x/cpu.h
> @@ -569,6 +569,29 @@ typedef struct SysIB_322 {
>   } SysIB_322;
>   QEMU_BUILD_BUG_ON(sizeof(SysIB_322) != 4096);
>   
> +/*
> + * Topology Magnitude fields (MAG) indicates the maximum number of
> + * topology list entries (TLE) at the corresponding nesting level.
> + */
> +#define S390_TOPOLOGY_MAG  6
> +#define S390_TOPOLOGY_MAG6 0
> +#define S390_TOPOLOGY_MAG5 1
> +#define S390_TOPOLOGY_MAG4 2
> +#define S390_TOPOLOGY_MAG3 3
> +#define S390_TOPOLOGY_MAG2 4
> +#define S390_TOPOLOGY_MAG1 5
> +/* Configuration topology */
> +typedef struct SysIB_151x {
> +    uint8_t  reserved0[2];
> +    uint16_t length;
> +    uint8_t  mag[S390_TOPOLOGY_MAG];
> +    uint8_t  reserved1;
> +    uint8_t  mnest;
> +    uint32_t reserved2;
> +    char tle[];
> +} SysIB_151x;
> +QEMU_BUILD_BUG_ON(sizeof(SysIB_151x) != 16);
> +
>   typedef union SysIB {
>       SysIB_111 sysib_111;
>       SysIB_121 sysib_121;
> @@ -576,9 +599,62 @@ typedef union SysIB {
>       SysIB_221 sysib_221;
>       SysIB_222 sysib_222;
>       SysIB_322 sysib_322;
> +    SysIB_151x sysib_151x;
>   } SysIB;
>   QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096);
>   
> +/*
> + * CPU Topology List provided by STSI with fc=15 provides a list
> + * of two different Topology List Entries (TLE) types to specify
> + * the topology hierarchy.
> + *
> + * - Container Topology List Entry
> + *   Defines a container to contain other Topology List Entries
> + *   of any type, nested containers or CPU.
> + * - CPU Topology List Entry
> + *   Specifies the CPUs position, type, entitlement and polarization
> + *   of the CPUs contained in the last Container TLE.
> + *
> + * There can be theoretically up to five levels of containers, QEMU
> + * uses only three levels, the drawer's, book's and socket's level.
> + *
> + * A container with a nesting level (NL) greater than 1 can only
> + * contain another container of nesting level NL-1.
> + *
> + * A container of nesting level 1 (socket), contains as many CPU TLE
> + * as needed to describe the position and qualities of all CPUs inside
> + * the container.
> + * The qualities of a CPU are polarization, entitlement and type.
> + *
> + * The CPU TLE defines the position of the CPUs of identical qualities
> + * using a 64bits mask which first bit has its offset defined by
> + * the CPU address orgin field of the CPU TLE like in:
> + * CPU address = origin * 64 + bit position within the mask
> + *
> + */
> +/* Container type Topology List Entry */
> +typedef struct SysIBTl_container {
> +        uint8_t nl;
> +        uint8_t reserved[6];
> +        uint8_t id;
> +} SysIBTl_container;

Why mixing CamelCase with underscore-style here? SysIBTlContainer would look 
more natural, I think?

> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_container) != 8);
> +
> +/* CPU type Topology List Entry */
> +typedef struct SysIBTl_cpu {
> +        uint8_t nl;
> +        uint8_t reserved0[3];
> +#define SYSIB_TLE_POLARITY_MASK 0x03
> +#define SYSIB_TLE_DEDICATED     0x04
> +        uint8_t flags;
> +        uint8_t type;
> +        uint16_t origin;
> +        uint64_t mask;
> +} SysIBTl_cpu;

dito, maybe better SysIBTlCpu ?

> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_cpu) != 16);
> +
> +void insert_stsi_15_1_x(S390CPU *cpu, int sel2, uint64_t addr, uint8_t ar);
> +
>   /* MMU defines */
>   #define ASCE_ORIGIN           (~0xfffULL) /* segment table origin             */
>   #define ASCE_SUBSPACE         0x200       /* subspace group control           */
> diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
> index b163c17f8f..ca1634d0ce 100644
> --- a/hw/s390x/cpu-topology.c
> +++ b/hw/s390x/cpu-topology.c
> @@ -29,11 +29,13 @@
>    * .cores_per_socket: tracks information on the count of cores
>    *                    per socket.
>    * .smp: keeps track of the machine topology.
> - *
> + * .list: queue the topology entries inside which
> + *        we keep the information on the CPU topology.

The comment does not match the code below.

>    */
>   S390Topology s390_topology = {
>       /* will be initialized after the cpu model is realized */
>       .cores_per_socket = NULL,
> +    .polarization = S390_CPU_POLARIZATION_HORIZONTAL,
>   };
>   
>   /**
> diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
> index 3ac7ec9acf..5ea358cbb0 100644
> --- a/target/s390x/kvm/kvm.c
> +++ b/target/s390x/kvm/kvm.c
> @@ -1919,9 +1919,12 @@ static int handle_stsi(S390CPU *cpu)
>           if (run->s390_stsi.sel1 != 2 || run->s390_stsi.sel2 != 2) {
>               return 0;
>           }
> -        /* Only sysib 3.2.2 needs post-handling for now. */
>           insert_stsi_3_2_2(cpu, run->s390_stsi.addr, run->s390_stsi.ar);
>           return 0;
> +    case 15:
> +        insert_stsi_15_1_x(cpu, run->s390_stsi.sel2, run->s390_stsi.addr,
> +                           run->s390_stsi.ar);
> +        return 0;
>       default:
>           return 0;
>       }
> diff --git a/target/s390x/kvm/stsi-topology.c b/target/s390x/kvm/stsi-topology.c
> new file mode 100644
> index 0000000000..0b789450da
> --- /dev/null
> +++ b/target/s390x/kvm/stsi-topology.c
> @@ -0,0 +1,310 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +/*
> + * QEMU S390x CPU Topology
> + *
> + * Copyright IBM Corp. 2022,2023
> + * Author(s): Pierre Morel <pmorel@linux.ibm.com>
> + *
> + */
> +#include "qemu/osdep.h"
> +#include "cpu.h"
> +#include "hw/s390x/pv.h"
> +#include "hw/sysbus.h"
> +#include "hw/s390x/sclp.h"
> +#include "hw/s390x/cpu-topology.h"
> +
> +/**
> + * fill_container:
> + * @p: The address of the container TLE to fill
> + * @level: The level of nesting for this container
> + * @id: The container receives a unique ID inside its own container
> + *
> + * Returns the next free TLE entry.
> + */
> +static char *fill_container(char *p, int level, int id)
> +{
> +    SysIBTl_container *tle = (SysIBTl_container *)p;
> +
> +    tle->nl = level;
> +    tle->id = id;
> +    return p + sizeof(*tle);
> +}
> +
> +/**
> + * fill_tle_cpu:
> + * @p: The address of the CPU TLE to fill
> + * @entry: a pointer to the S390TopologyEntry defining this
> + *         CPU container.
> + *
> + * Returns the next free TLE entry.
> + */
> +static char *fill_tle_cpu(char *p, S390TopologyEntry *entry)
> +{
> +    SysIBTl_cpu *tle = (SysIBTl_cpu *)p;
> +    s390_topology_id topology_id = entry->id;
> +
> +    tle->nl = 0;
> +    if (topology_id.dedicated) {
> +        tle->flags = SYSIB_TLE_DEDICATED;
> +    }
> +    tle->flags |= topology_id.entitlement;

If it was not dedicated, the "|=" might operate with a random value here. 
Better pre-initialize flags with 0, or change the order of the operations here:

     tle->flags = topology_id.entitlement;
     if (topology_id.dedicated) {
         tle->flags |= SYSIB_TLE_DEDICATED;
     }

> +    tle->type = topology_id.type;
> +    tle->origin = cpu_to_be16(topology_id.origin * 64);
> +    tle->mask = cpu_to_be64(entry->mask);
> +    return p + sizeof(*tle);
> +}
> +
> +/*
> + * Macro to check that the size of data after increment
> + * will not get bigger than the size of the SysIB.
> + */
> +#define SYSIB_GUARD(data, x) do {       \
> +        data += x;                      \
> +        if (data > sizeof(SysIB)) {    \
> +            return 0;                   \
> +        }                               \
> +    } while (0)
> +
> +/**
> + * stsi_topology_fill_sysib:
> + * @p: A pointer to the position of the first TLE
> + * @level: The nested level wanted by the guest
> + *
> + * Fill the SYSIB with the topology information as described in
> + * the PoP, nesting containers as appropriate, with the maximum
> + * nesting limited by @level.
> + *
> + * Return value:
> + * On success: the size of the SysIB_15x after being filled with TLE.
> + * On error: 0 in the case we would overrun the end of the SysIB.
> + */
> +static int stsi_topology_fill_sysib(S390TopologyList *topology_list,
> +                                    char *p, int level)
> +{
> +    S390TopologyEntry *entry;
> +    int last_drawer = -1;
> +    int last_book = -1;
> +    int last_socket = -1;
> +    int drawer_id = 0;
> +    int book_id = 0;
> +    int socket_id = 0;
> +    int n = sizeof(SysIB_151x);
> +
> +    QTAILQ_FOREACH(entry, topology_list, next) {
> +        bool drawer_change = last_drawer != entry->id.drawer;
> +        bool book_change = drawer_change || last_book != entry->id.book;
> +        bool socket_change = book_change || last_socket != entry->id.socket;
> +
> +        /* If we reach the sentinel get out */
> +        if (entry->id.sentinel) {
> +            break;
> +        }
> +
> +        if (level > 3 && drawer_change) {
> +            SYSIB_GUARD(n, sizeof(SysIBTl_container));
> +            p = fill_container(p, 3, drawer_id++);
> +            book_id = 0;
> +        }
> +        if (level > 2 && book_change) {
> +            SYSIB_GUARD(n, sizeof(SysIBTl_container));
> +            p = fill_container(p, 2, book_id++);
> +            socket_id = 0;
> +        }
> +        if (socket_change) {
> +            SYSIB_GUARD(n, sizeof(SysIBTl_container));
> +            p = fill_container(p, 1, socket_id++);
> +        }
> +
> +        SYSIB_GUARD(n, sizeof(SysIBTl_cpu));
> +        p = fill_tle_cpu(p, entry);
> +        last_drawer = entry->id.drawer;
> +        last_book = entry->id.book;
> +        last_socket = entry->id.socket;
> +    }
> +
> +    return n;
> +}
> +
> +/**
> + * setup_stsi:
> + * sysib: pointer to a SysIB to be filled with SysIB_151x data
> + * level: Nested level specified by the guest
> + *
> + * Setup the SYSIB for STSI 15.1, the header as well as the description
> + * of the topology.
> + */
> +static int setup_stsi(S390TopologyList *topology_list, SysIB_151x *sysib,
> +                      int level)
> +{
> +    sysib->mnest = level;
> +    switch (level) {
> +    case 4:
> +        sysib->mag[S390_TOPOLOGY_MAG4] = current_machine->smp.drawers;
> +        sysib->mag[S390_TOPOLOGY_MAG3] = current_machine->smp.books;
> +        sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.sockets;
> +        sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores;
> +        break;
> +    case 3:
> +        sysib->mag[S390_TOPOLOGY_MAG3] = current_machine->smp.drawers *
> +                                         current_machine->smp.books;
> +        sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.sockets;
> +        sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores;
> +        break;
> +    case 2:
> +        sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.drawers *
> +                                         current_machine->smp.books *
> +                                         current_machine->smp.sockets;
> +        sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores;
> +        break;
> +    }
> +
> +    return stsi_topology_fill_sysib(topology_list, sysib->tle, level);
> +}
> +
> +/**
> + * s390_topology_add_cpu_to_entry:
> + * @entry: Topology entry to setup
> + * @cpu: the S390CPU to add
> + *
> + * Set the core bit inside the topology mask and
> + * increments the number of cores for the socket.

The second part of the sentence does not seem to be done here anymore?

> + */
> +static void s390_topology_add_cpu_to_entry(S390TopologyEntry *entry,
> +                                           S390CPU *cpu)
> +{
> +    set_bit(63 - (cpu->env.core_id % 64), &entry->mask);
> +}
> +
> +/**
> + * s390_topology_from_cpu:
> + * @cpu: The S390CPU
> + *
> + * Initialize the topology id from the CPU environment.
> + */
> +static s390_topology_id s390_topology_from_cpu(S390CPU *cpu)
> +{
> +    s390_topology_id topology_id = {0};
> +
> +    topology_id.drawer = cpu->env.drawer_id;
> +    topology_id.book = cpu->env.book_id;
> +    topology_id.socket = cpu->env.socket_id;
> +    topology_id.origin = cpu->env.core_id / 64;
> +    topology_id.type = S390_TOPOLOGY_CPU_IFL;
> +    topology_id.dedicated = cpu->env.dedicated;
> +
> +    if (s390_topology.polarization == S390_CPU_POLARIZATION_VERTICAL) {
> +        topology_id.entitlement = cpu->env.entitlement;
> +    }
> +
> +    return topology_id;
> +}
> +
> +/**
> + * s390_topology_insert:
> + * @cpu: s390CPU insert.

"s390CPU insert" sounds like a weird description for @cpu

> + *
> + * Parse the topology list to find if the entry already
> + * exist and add the core in it.
> + * If it does not exist, allocate a new entry and insert
> + * it in the queue from lower id to greater id.
> + */
> +static void s390_topology_insert(S390TopologyList *topology_list, S390CPU *cpu)
> +{
> +    s390_topology_id id = s390_topology_from_cpu(cpu);
> +    S390TopologyEntry *entry = NULL;
> +    S390TopologyEntry *tmp = NULL;
> +
> +    QTAILQ_FOREACH(tmp, topology_list, next) {
> +        if (id.id == tmp->id.id) {
> +            s390_topology_add_cpu_to_entry(tmp, cpu);
> +            return;
> +        } else if (id.id < tmp->id.id) {
> +            entry = g_malloc0(sizeof(S390TopologyEntry));
> +            entry->id.id = id.id;
> +            s390_topology_add_cpu_to_entry(entry, cpu);
> +            QTAILQ_INSERT_BEFORE(tmp, entry, next);
> +            return;
> +        }
> +    }
> +}
> +
> +/**
> + * s390_topology_fill_list_sorted:
> + *
> + * Loop over all CPU and insert it at the right place
> + * inside the TLE entry list.
> + * Fill the S390Topology list with entries according to the order
> + * specified by the PoP.
> + */
> +static void s390_topology_fill_list_sorted(S390TopologyList *topology_list)
> +{
> +    CPUState *cs;
> +
> +    CPU_FOREACH(cs) {
> +        s390_topology_insert(topology_list, S390_CPU(cs));
> +    }
> +}
> +
> +/**
> + * s390_topology_empty_list:
> + *
> + * Clear all entries in the S390Topology list except the sentinel.
> + */
> +static void s390_topology_empty_list(S390TopologyList *topology_list)
> +{
> +    S390TopologyEntry *entry = NULL;
> +    S390TopologyEntry *tmp = NULL;
> +
> +    QTAILQ_FOREACH_SAFE(entry, topology_list, next, tmp) {
> +        QTAILQ_REMOVE(topology_list, entry, next);
> +        g_free(entry);
> +    }
> +}
> +
> +/**
> + * insert_stsi_15_1_x:
> + * cpu: the CPU doing the call for which we set CC
> + * sel2: the selector 2, containing the nested level
> + * addr: Guest logical address of the guest SysIB
> + * ar: the access register number
> + *
> + * Create a list head for the Topology entries and initialize it.

s/Topology/topology/

> + * Insert the first entry as a sentinelle.

s/sentinelle/sentinel/

> + * Emulate STSI 15.1.x, that is, perform all necessary checks and
> + * fill the SYSIB.
> + * In case the topology description is too long to fit into the SYSIB,
> + * set CC=3 and abort without writing the SYSIB.
> + */
> +void insert_stsi_15_1_x(S390CPU *cpu, int sel2, uint64_t addr, uint8_t ar)
> +{
> +    S390TopologyList topology_list;
> +    S390TopologyEntry *entry;
> +    SysIB sysib = {0};
> +    int length;
> +
> +    if (!s390_has_topology() || sel2 < 2 || sel2 > SCLP_READ_SCP_INFO_MNEST) {
> +        setcc(cpu, 3);
> +        return;
> +    }
> +
> +    QTAILQ_INIT(&topology_list);
> +    entry = g_malloc0(sizeof(S390TopologyEntry));
> +    entry->id.sentinel = 0xff;
> +    QTAILQ_INSERT_HEAD(&topology_list, entry, next);
> +
> +    s390_topology_fill_list_sorted(&topology_list);
> +
> +    length = setup_stsi(&topology_list, &sysib.sysib_151x, sel2);
> +
> +    if (!length) {
> +        setcc(cpu, 3);
> +        return;
> +    }
> +
> +    sysib.sysib_151x.length = cpu_to_be16(length);
> +    s390_cpu_virt_mem_write(cpu, addr, ar, &sysib, length);
> +    setcc(cpu, 0);

s390_cpu_virt_mem_write() could fail, I think in that case you should not 
change the CC value here?

Also, what about protected virtualization? Do you have to use 
s390_cpu_pv_mem_write() in case PV is enabled?

> +
> +    s390_topology_empty_list(&topology_list);
> +}
> diff --git a/target/s390x/kvm/meson.build b/target/s390x/kvm/meson.build
> index 37253f75bf..bcf014ba87 100644
> --- a/target/s390x/kvm/meson.build
> +++ b/target/s390x/kvm/meson.build
> @@ -1,6 +1,7 @@
>   
>   s390x_ss.add(when: 'CONFIG_KVM', if_true: files(
> -  'kvm.c'
> +  'kvm.c',
> +  'stsi-topology.c'
>   ), if_false: files(
>     'stubs.c'
>   ))

  Thomas
Thomas Huth July 4, 2023, 12:27 p.m. UTC | #2
On 04/07/2023 13.40, Thomas Huth wrote:
...
> Also, what about protected virtualization? Do you have to use 
> s390_cpu_pv_mem_write() in case PV is enabled?

Never mind, I keep forgetting that CPU topology can't be used together with 
PV (I just noticed after reading patch 07/20).
Not sure ... but maybe a comment here in the code would help?

  Thomas
Pierre Morel July 12, 2023, 2:24 p.m. UTC | #3
On 7/4/23 13:40, Thomas Huth wrote:
> On 30/06/2023 11.17, Pierre Morel wrote:
>> On interception of STSI(15.1.x) the System Information Block
>> (SYSIB) is built from the list of pre-ordered topology entries.
>>
>> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
>> ---
> ...
>> diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
>> index 7ebd5e05b6..6e7d041b01 100644
>> --- a/target/s390x/cpu.h
>> +++ b/target/s390x/cpu.h
>> @@ -569,6 +569,29 @@ typedef struct SysIB_322 {
>>   } SysIB_322;
>>   QEMU_BUILD_BUG_ON(sizeof(SysIB_322) != 4096);
>>   +/*
>> + * Topology Magnitude fields (MAG) indicates the maximum number of
>> + * topology list entries (TLE) at the corresponding nesting level.
>> + */
>> +#define S390_TOPOLOGY_MAG  6
>> +#define S390_TOPOLOGY_MAG6 0
>> +#define S390_TOPOLOGY_MAG5 1
>> +#define S390_TOPOLOGY_MAG4 2
>> +#define S390_TOPOLOGY_MAG3 3
>> +#define S390_TOPOLOGY_MAG2 4
>> +#define S390_TOPOLOGY_MAG1 5
>> +/* Configuration topology */
>> +typedef struct SysIB_151x {
>> +    uint8_t  reserved0[2];
>> +    uint16_t length;
>> +    uint8_t  mag[S390_TOPOLOGY_MAG];
>> +    uint8_t  reserved1;
>> +    uint8_t  mnest;
>> +    uint32_t reserved2;
>> +    char tle[];
>> +} SysIB_151x;
>> +QEMU_BUILD_BUG_ON(sizeof(SysIB_151x) != 16);
>> +
>>   typedef union SysIB {
>>       SysIB_111 sysib_111;
>>       SysIB_121 sysib_121;
>> @@ -576,9 +599,62 @@ typedef union SysIB {
>>       SysIB_221 sysib_221;
>>       SysIB_222 sysib_222;
>>       SysIB_322 sysib_322;
>> +    SysIB_151x sysib_151x;
>>   } SysIB;
>>   QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096);
>>   +/*
>> + * CPU Topology List provided by STSI with fc=15 provides a list
>> + * of two different Topology List Entries (TLE) types to specify
>> + * the topology hierarchy.
>> + *
>> + * - Container Topology List Entry
>> + *   Defines a container to contain other Topology List Entries
>> + *   of any type, nested containers or CPU.
>> + * - CPU Topology List Entry
>> + *   Specifies the CPUs position, type, entitlement and polarization
>> + *   of the CPUs contained in the last Container TLE.
>> + *
>> + * There can be theoretically up to five levels of containers, QEMU
>> + * uses only three levels, the drawer's, book's and socket's level.
>> + *
>> + * A container with a nesting level (NL) greater than 1 can only
>> + * contain another container of nesting level NL-1.
>> + *
>> + * A container of nesting level 1 (socket), contains as many CPU TLE
>> + * as needed to describe the position and qualities of all CPUs inside
>> + * the container.
>> + * The qualities of a CPU are polarization, entitlement and type.
>> + *
>> + * The CPU TLE defines the position of the CPUs of identical qualities
>> + * using a 64bits mask which first bit has its offset defined by
>> + * the CPU address orgin field of the CPU TLE like in:
>> + * CPU address = origin * 64 + bit position within the mask
>> + *
>> + */
>> +/* Container type Topology List Entry */
>> +typedef struct SysIBTl_container {
>> +        uint8_t nl;
>> +        uint8_t reserved[6];
>> +        uint8_t id;
>> +} SysIBTl_container;
>
> Why mixing CamelCase with underscore-style here? SysIBTlContainer 
> would look more natural, I think?


OK, what about SYSIBContainerListEntry ?


>
>> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_container) != 8);
>> +
>> +/* CPU type Topology List Entry */
>> +typedef struct SysIBTl_cpu {
>> +        uint8_t nl;
>> +        uint8_t reserved0[3];
>> +#define SYSIB_TLE_POLARITY_MASK 0x03
>> +#define SYSIB_TLE_DEDICATED     0x04
>> +        uint8_t flags;
>> +        uint8_t type;
>> +        uint16_t origin;
>> +        uint64_t mask;
>> +} SysIBTl_cpu;
>
> dito, maybe better SysIBTlCpu ?


What about SysIBCPUListEntry ?


>
>> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_cpu) != 16);
>> +
>> +void insert_stsi_15_1_x(S390CPU *cpu, int sel2, uint64_t addr, 
>> uint8_t ar);
>> +
>>   /* MMU defines */
>>   #define ASCE_ORIGIN           (~0xfffULL) /* segment table 
>> origin             */
>>   #define ASCE_SUBSPACE         0x200       /* subspace group 
>> control           */
>> diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
>> index b163c17f8f..ca1634d0ce 100644
>> --- a/hw/s390x/cpu-topology.c
>> +++ b/hw/s390x/cpu-topology.c
>> @@ -29,11 +29,13 @@
>>    * .cores_per_socket: tracks information on the count of cores
>>    *                    per socket.
>>    * .smp: keeps track of the machine topology.
>> - *
>> + * .list: queue the topology entries inside which
>> + *        we keep the information on the CPU topology.
>
> The comment does not match the code below.


a left over I modify this. Thanks.


>
>>    */
>>   S390Topology s390_topology = {
>>       /* will be initialized after the cpu model is realized */
>>       .cores_per_socket = NULL,
>> +    .polarization = S390_CPU_POLARIZATION_HORIZONTAL,
>>   };
>>     /**
>> diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
>> index 3ac7ec9acf..5ea358cbb0 100644
>> --- a/target/s390x/kvm/kvm.c
>> +++ b/target/s390x/kvm/kvm.c
>> @@ -1919,9 +1919,12 @@ static int handle_stsi(S390CPU *cpu)
>>           if (run->s390_stsi.sel1 != 2 || run->s390_stsi.sel2 != 2) {
>>               return 0;
>>           }
>> -        /* Only sysib 3.2.2 needs post-handling for now. */
>>           insert_stsi_3_2_2(cpu, run->s390_stsi.addr, 
>> run->s390_stsi.ar);
>>           return 0;
>> +    case 15:
>> +        insert_stsi_15_1_x(cpu, run->s390_stsi.sel2, 
>> run->s390_stsi.addr,
>> +                           run->s390_stsi.ar);
>> +        return 0;
>>       default:
>>           return 0;
>>       }
>> diff --git a/target/s390x/kvm/stsi-topology.c 
>> b/target/s390x/kvm/stsi-topology.c
>> new file mode 100644
>> index 0000000000..0b789450da
>> --- /dev/null
>> +++ b/target/s390x/kvm/stsi-topology.c
>> @@ -0,0 +1,310 @@
>> +/* SPDX-License-Identifier: GPL-2.0-or-later */
>> +/*
>> + * QEMU S390x CPU Topology
>> + *
>> + * Copyright IBM Corp. 2022,2023
>> + * Author(s): Pierre Morel <pmorel@linux.ibm.com>
>> + *
>> + */
>> +#include "qemu/osdep.h"
>> +#include "cpu.h"
>> +#include "hw/s390x/pv.h"
>> +#include "hw/sysbus.h"
>> +#include "hw/s390x/sclp.h"
>> +#include "hw/s390x/cpu-topology.h"
>> +
>> +/**
>> + * fill_container:
>> + * @p: The address of the container TLE to fill
>> + * @level: The level of nesting for this container
>> + * @id: The container receives a unique ID inside its own container
>> + *
>> + * Returns the next free TLE entry.
>> + */
>> +static char *fill_container(char *p, int level, int id)
>> +{
>> +    SysIBTl_container *tle = (SysIBTl_container *)p;
>> +
>> +    tle->nl = level;
>> +    tle->id = id;
>> +    return p + sizeof(*tle);
>> +}
>> +
>> +/**
>> + * fill_tle_cpu:
>> + * @p: The address of the CPU TLE to fill
>> + * @entry: a pointer to the S390TopologyEntry defining this
>> + *         CPU container.
>> + *
>> + * Returns the next free TLE entry.
>> + */
>> +static char *fill_tle_cpu(char *p, S390TopologyEntry *entry)
>> +{
>> +    SysIBTl_cpu *tle = (SysIBTl_cpu *)p;
>> +    s390_topology_id topology_id = entry->id;
>> +
>> +    tle->nl = 0;
>> +    if (topology_id.dedicated) {
>> +        tle->flags = SYSIB_TLE_DEDICATED;
>> +    }
>> +    tle->flags |= topology_id.entitlement;
>
> If it was not dedicated, the "|=" might operate with a random value 
> here. Better pre-initialize flags with 0, or change the order of the 
> operations here:
>
>     tle->flags = topology_id.entitlement;
>     if (topology_id.dedicated) {
>         tle->flags |= SYSIB_TLE_DEDICATED;
>     }


oh, yes.


>
>> +    tle->type = topology_id.type;
>> +    tle->origin = cpu_to_be16(topology_id.origin * 64);
>> +    tle->mask = cpu_to_be64(entry->mask);
>> +    return p + sizeof(*tle);
>> +}
>> +
>> +/*
>> + * Macro to check that the size of data after increment
>> + * will not get bigger than the size of the SysIB.
>> + */
>> +#define SYSIB_GUARD(data, x) do {       \
>> +        data += x;                      \
>> +        if (data > sizeof(SysIB)) {    \
>> +            return 0;                   \
>> +        }                               \
>> +    } while (0)
>> +
>> +/**
>> + * stsi_topology_fill_sysib:
>> + * @p: A pointer to the position of the first TLE
>> + * @level: The nested level wanted by the guest
>> + *
>> + * Fill the SYSIB with the topology information as described in
>> + * the PoP, nesting containers as appropriate, with the maximum
>> + * nesting limited by @level.
>> + *
>> + * Return value:
>> + * On success: the size of the SysIB_15x after being filled with TLE.
>> + * On error: 0 in the case we would overrun the end of the SysIB.
>> + */
>> +static int stsi_topology_fill_sysib(S390TopologyList *topology_list,
>> +                                    char *p, int level)
>> +{
>> +    S390TopologyEntry *entry;
>> +    int last_drawer = -1;
>> +    int last_book = -1;
>> +    int last_socket = -1;
>> +    int drawer_id = 0;
>> +    int book_id = 0;
>> +    int socket_id = 0;
>> +    int n = sizeof(SysIB_151x);
>> +
>> +    QTAILQ_FOREACH(entry, topology_list, next) {
>> +        bool drawer_change = last_drawer != entry->id.drawer;
>> +        bool book_change = drawer_change || last_book != 
>> entry->id.book;
>> +        bool socket_change = book_change || last_socket != 
>> entry->id.socket;
>> +
>> +        /* If we reach the sentinel get out */
>> +        if (entry->id.sentinel) {
>> +            break;
>> +        }
>> +
>> +        if (level > 3 && drawer_change) {
>> +            SYSIB_GUARD(n, sizeof(SysIBTl_container));
>> +            p = fill_container(p, 3, drawer_id++);
>> +            book_id = 0;
>> +        }
>> +        if (level > 2 && book_change) {
>> +            SYSIB_GUARD(n, sizeof(SysIBTl_container));
>> +            p = fill_container(p, 2, book_id++);
>> +            socket_id = 0;
>> +        }
>> +        if (socket_change) {
>> +            SYSIB_GUARD(n, sizeof(SysIBTl_container));
>> +            p = fill_container(p, 1, socket_id++);
>> +        }
>> +
>> +        SYSIB_GUARD(n, sizeof(SysIBTl_cpu));
>> +        p = fill_tle_cpu(p, entry);
>> +        last_drawer = entry->id.drawer;
>> +        last_book = entry->id.book;
>> +        last_socket = entry->id.socket;
>> +    }
>> +
>> +    return n;
>> +}
>> +
>> +/**
>> + * setup_stsi:
>> + * sysib: pointer to a SysIB to be filled with SysIB_151x data
>> + * level: Nested level specified by the guest
>> + *
>> + * Setup the SYSIB for STSI 15.1, the header as well as the description
>> + * of the topology.
>> + */
>> +static int setup_stsi(S390TopologyList *topology_list, SysIB_151x 
>> *sysib,
>> +                      int level)
>> +{
>> +    sysib->mnest = level;
>> +    switch (level) {
>> +    case 4:
>> +        sysib->mag[S390_TOPOLOGY_MAG4] = current_machine->smp.drawers;
>> +        sysib->mag[S390_TOPOLOGY_MAG3] = current_machine->smp.books;
>> +        sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.sockets;
>> +        sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores;
>> +        break;
>> +    case 3:
>> +        sysib->mag[S390_TOPOLOGY_MAG3] = current_machine->smp.drawers *
>> + current_machine->smp.books;
>> +        sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.sockets;
>> +        sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores;
>> +        break;
>> +    case 2:
>> +        sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.drawers *
>> + current_machine->smp.books *
>> + current_machine->smp.sockets;
>> +        sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores;
>> +        break;
>> +    }
>> +
>> +    return stsi_topology_fill_sysib(topology_list, sysib->tle, level);
>> +}
>> +
>> +/**
>> + * s390_topology_add_cpu_to_entry:
>> + * @entry: Topology entry to setup
>> + * @cpu: the S390CPU to add
>> + *
>> + * Set the core bit inside the topology mask and
>> + * increments the number of cores for the socket.
>
> The second part of the sentence does not seem to be done here anymore?


Right, it is not needed here anymore since this is doe during the STSI 
interception.

It was needed during the setup of the topology or during changes, 
hotplug or moving a CPU,

which is done in hw/s390/cpu.topology.c

>
>> + */
>> +static void s390_topology_add_cpu_to_entry(S390TopologyEntry *entry,
>> +                                           S390CPU *cpu)
>> +{
>> +    set_bit(63 - (cpu->env.core_id % 64), &entry->mask);
>> +}
>> +
>> +/**
>> + * s390_topology_from_cpu:
>> + * @cpu: The S390CPU
>> + *
>> + * Initialize the topology id from the CPU environment.
>> + */
>> +static s390_topology_id s390_topology_from_cpu(S390CPU *cpu)
>> +{
>> +    s390_topology_id topology_id = {0};
>> +
>> +    topology_id.drawer = cpu->env.drawer_id;
>> +    topology_id.book = cpu->env.book_id;
>> +    topology_id.socket = cpu->env.socket_id;
>> +    topology_id.origin = cpu->env.core_id / 64;
>> +    topology_id.type = S390_TOPOLOGY_CPU_IFL;
>> +    topology_id.dedicated = cpu->env.dedicated;
>> +
>> +    if (s390_topology.polarization == S390_CPU_POLARIZATION_VERTICAL) {
>> +        topology_id.entitlement = cpu->env.entitlement;
>> +    }
>> +
>> +    return topology_id;
>> +}
>> +
>> +/**
>> + * s390_topology_insert:
>> + * @cpu: s390CPU insert.
>
> "s390CPU insert" sounds like a weird description for @cpu


yes I think "S390CPU to be inserted in the topology list" is better


>
>> + *
>> + * Parse the topology list to find if the entry already
>> + * exist and add the core in it.
>> + * If it does not exist, allocate a new entry and insert
>> + * it in the queue from lower id to greater id.
>> + */
>> +static void s390_topology_insert(S390TopologyList *topology_list, 
>> S390CPU *cpu)
>> +{
>> +    s390_topology_id id = s390_topology_from_cpu(cpu);
>> +    S390TopologyEntry *entry = NULL;
>> +    S390TopologyEntry *tmp = NULL;
>> +
>> +    QTAILQ_FOREACH(tmp, topology_list, next) {
>> +        if (id.id == tmp->id.id) {
>> +            s390_topology_add_cpu_to_entry(tmp, cpu);
>> +            return;
>> +        } else if (id.id < tmp->id.id) {
>> +            entry = g_malloc0(sizeof(S390TopologyEntry));
>> +            entry->id.id = id.id;
>> +            s390_topology_add_cpu_to_entry(entry, cpu);
>> +            QTAILQ_INSERT_BEFORE(tmp, entry, next);
>> +            return;
>> +        }
>> +    }
>> +}
>> +
>> +/**
>> + * s390_topology_fill_list_sorted:
>> + *
>> + * Loop over all CPU and insert it at the right place
>> + * inside the TLE entry list.
>> + * Fill the S390Topology list with entries according to the order
>> + * specified by the PoP.
>> + */
>> +static void s390_topology_fill_list_sorted(S390TopologyList 
>> *topology_list)
>> +{
>> +    CPUState *cs;
>> +
>> +    CPU_FOREACH(cs) {
>> +        s390_topology_insert(topology_list, S390_CPU(cs));
>> +    }
>> +}
>> +
>> +/**
>> + * s390_topology_empty_list:
>> + *
>> + * Clear all entries in the S390Topology list except the sentinel.
>> + */
>> +static void s390_topology_empty_list(S390TopologyList *topology_list)
>> +{
>> +    S390TopologyEntry *entry = NULL;
>> +    S390TopologyEntry *tmp = NULL;
>> +
>> +    QTAILQ_FOREACH_SAFE(entry, topology_list, next, tmp) {
>> +        QTAILQ_REMOVE(topology_list, entry, next);
>> +        g_free(entry);
>> +    }
>> +}
>> +
>> +/**
>> + * insert_stsi_15_1_x:
>> + * cpu: the CPU doing the call for which we set CC
>> + * sel2: the selector 2, containing the nested level
>> + * addr: Guest logical address of the guest SysIB
>> + * ar: the access register number
>> + *
>> + * Create a list head for the Topology entries and initialize it.
>
> s/Topology/topology/
OK
>
>> + * Insert the first entry as a sentinelle.
>
> s/sentinelle/sentinel/

thx


>
>> + * Emulate STSI 15.1.x, that is, perform all necessary checks and
>> + * fill the SYSIB.
>> + * In case the topology description is too long to fit into the SYSIB,
>> + * set CC=3 and abort without writing the SYSIB.
>> + */
>> +void insert_stsi_15_1_x(S390CPU *cpu, int sel2, uint64_t addr, 
>> uint8_t ar)
>> +{
>> +    S390TopologyList topology_list;
>> +    S390TopologyEntry *entry;
>> +    SysIB sysib = {0};
>> +    int length;
>> +
>> +    if (!s390_has_topology() || sel2 < 2 || sel2 > 
>> SCLP_READ_SCP_INFO_MNEST) {
>> +        setcc(cpu, 3);
>> +        return;
>> +    }
>> +
>> +    QTAILQ_INIT(&topology_list);
>> +    entry = g_malloc0(sizeof(S390TopologyEntry));
>> +    entry->id.sentinel = 0xff;
>> +    QTAILQ_INSERT_HEAD(&topology_list, entry, next);
>> +
>> +    s390_topology_fill_list_sorted(&topology_list);
>> +
>> +    length = setup_stsi(&topology_list, &sysib.sysib_151x, sel2);
>> +
>> +    if (!length) {
>> +        setcc(cpu, 3);


I forgot to free the buffers here... will be added like


     if (!length) {
         s390_topology_empty_list(&topology_list);
         setcc(cpu, 3);
         return;
     }


>> +        return;
>> +    }
>> +
>> +    sysib.sysib_151x.length = cpu_to_be16(length);
>> +    s390_cpu_virt_mem_write(cpu, addr, ar, &sysib, length);
>> +    setcc(cpu, 0);
>
> s390_cpu_virt_mem_write() could fail, I think in that case you should 
> not change the CC value here?


right, thanks.

Regards,

Pierre
Thomas Huth July 12, 2023, 8:14 p.m. UTC | #4
On 12/07/2023 16.24, Pierre Morel wrote:
> 
> On 7/4/23 13:40, Thomas Huth wrote:
>> On 30/06/2023 11.17, Pierre Morel wrote:
>>> On interception of STSI(15.1.x) the System Information Block
>>> (SYSIB) is built from the list of pre-ordered topology entries.
>>>
>>> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
>>> ---
>> ...
>>> diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
>>> index 7ebd5e05b6..6e7d041b01 100644
>>> --- a/target/s390x/cpu.h
>>> +++ b/target/s390x/cpu.h
>>> @@ -569,6 +569,29 @@ typedef struct SysIB_322 {
>>>   } SysIB_322;
>>>   QEMU_BUILD_BUG_ON(sizeof(SysIB_322) != 4096);
>>>   +/*
>>> + * Topology Magnitude fields (MAG) indicates the maximum number of
>>> + * topology list entries (TLE) at the corresponding nesting level.
>>> + */
>>> +#define S390_TOPOLOGY_MAG  6
>>> +#define S390_TOPOLOGY_MAG6 0
>>> +#define S390_TOPOLOGY_MAG5 1
>>> +#define S390_TOPOLOGY_MAG4 2
>>> +#define S390_TOPOLOGY_MAG3 3
>>> +#define S390_TOPOLOGY_MAG2 4
>>> +#define S390_TOPOLOGY_MAG1 5
>>> +/* Configuration topology */
>>> +typedef struct SysIB_151x {
>>> +    uint8_t  reserved0[2];
>>> +    uint16_t length;
>>> +    uint8_t  mag[S390_TOPOLOGY_MAG];
>>> +    uint8_t  reserved1;
>>> +    uint8_t  mnest;
>>> +    uint32_t reserved2;
>>> +    char tle[];
>>> +} SysIB_151x;
>>> +QEMU_BUILD_BUG_ON(sizeof(SysIB_151x) != 16);
>>> +
>>>   typedef union SysIB {
>>>       SysIB_111 sysib_111;
>>>       SysIB_121 sysib_121;
>>> @@ -576,9 +599,62 @@ typedef union SysIB {
>>>       SysIB_221 sysib_221;
>>>       SysIB_222 sysib_222;
>>>       SysIB_322 sysib_322;
>>> +    SysIB_151x sysib_151x;
>>>   } SysIB;
>>>   QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096);
>>>   +/*
>>> + * CPU Topology List provided by STSI with fc=15 provides a list
>>> + * of two different Topology List Entries (TLE) types to specify
>>> + * the topology hierarchy.
>>> + *
>>> + * - Container Topology List Entry
>>> + *   Defines a container to contain other Topology List Entries
>>> + *   of any type, nested containers or CPU.
>>> + * - CPU Topology List Entry
>>> + *   Specifies the CPUs position, type, entitlement and polarization
>>> + *   of the CPUs contained in the last Container TLE.
>>> + *
>>> + * There can be theoretically up to five levels of containers, QEMU
>>> + * uses only three levels, the drawer's, book's and socket's level.
>>> + *
>>> + * A container with a nesting level (NL) greater than 1 can only
>>> + * contain another container of nesting level NL-1.
>>> + *
>>> + * A container of nesting level 1 (socket), contains as many CPU TLE
>>> + * as needed to describe the position and qualities of all CPUs inside
>>> + * the container.
>>> + * The qualities of a CPU are polarization, entitlement and type.
>>> + *
>>> + * The CPU TLE defines the position of the CPUs of identical qualities
>>> + * using a 64bits mask which first bit has its offset defined by
>>> + * the CPU address orgin field of the CPU TLE like in:
>>> + * CPU address = origin * 64 + bit position within the mask
>>> + *
>>> + */
>>> +/* Container type Topology List Entry */
>>> +typedef struct SysIBTl_container {
>>> +        uint8_t nl;
>>> +        uint8_t reserved[6];
>>> +        uint8_t id;
>>> +} SysIBTl_container;
>>
>> Why mixing CamelCase with underscore-style here? SysIBTlContainer would 
>> look more natural, I think?
> 
> 
> OK, what about SYSIBContainerListEntry ?

Sounds fine!

> 
>>
>>> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_container) != 8);
>>> +
>>> +/* CPU type Topology List Entry */
>>> +typedef struct SysIBTl_cpu {
>>> +        uint8_t nl;
>>> +        uint8_t reserved0[3];
>>> +#define SYSIB_TLE_POLARITY_MASK 0x03
>>> +#define SYSIB_TLE_DEDICATED     0x04
>>> +        uint8_t flags;
>>> +        uint8_t type;
>>> +        uint16_t origin;
>>> +        uint64_t mask;
>>> +} SysIBTl_cpu;
>>
>> dito, maybe better SysIBTlCpu ?
> 
> 
> What about SysIBCPUListEntry ?

Ack.

  Thomas
Nina Schoetterl-Glausch July 25, 2023, 3:41 p.m. UTC | #5
On Fri, 2023-06-30 at 11:17 +0200, Pierre Morel wrote:
> On interception of STSI(15.1.x) the System Information Block
> (SYSIB) is built from the list of pre-ordered topology entries.
> 
> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
> ---
>  MAINTAINERS                      |   1 +
>  qapi/machine-target.json         |  14 ++
>  include/hw/s390x/cpu-topology.h  |  25 +++
>  include/hw/s390x/sclp.h          |   1 +
>  target/s390x/cpu.h               |  76 ++++++++
>  hw/s390x/cpu-topology.c          |   4 +-
>  target/s390x/kvm/kvm.c           |   5 +-
>  target/s390x/kvm/stsi-topology.c | 310 +++++++++++++++++++++++++++++++
>  target/s390x/kvm/meson.build     |   3 +-
>  9 files changed, 436 insertions(+), 3 deletions(-)
>  create mode 100644 target/s390x/kvm/stsi-topology.c

[...]

>  typedef struct S390Topology {
>      uint8_t *cores_per_socket;
> +    bool polarization;

You don't use this as a bool and since it's no longer called
vertical_polarization, it's not longer entirely clear what the value
means so I think this should be a CpuS390Polarization.
That also makes the assignment in patch 12 clearer since it assigns the
same type.

[...]

>  S390Topology s390_topology = {
>      /* will be initialized after the cpu model is realized */
>      .cores_per_socket = NULL,
> +    .polarization = S390_CPU_POLARIZATION_HORIZONTAL,
>  };

[...]

> +static s390_topology_id s390_topology_from_cpu(S390CPU *cpu)
> +{
> +    s390_topology_id topology_id = {0};
> +
> +    topology_id.drawer = cpu->env.drawer_id;
> +    topology_id.book = cpu->env.book_id;
> +    topology_id.socket = cpu->env.socket_id;
> +    topology_id.origin = cpu->env.core_id / 64;
> +    topology_id.type = S390_TOPOLOGY_CPU_IFL;
> +    topology_id.dedicated = cpu->env.dedicated;
> +
> +    if (s390_topology.polarization == S390_CPU_POLARIZATION_VERTICAL) {
> +        topology_id.entitlement = cpu->env.entitlement;
> +    }
> +
> +    return topology_id;
> +}

[...]
Pierre Morel July 26, 2023, 8:11 a.m. UTC | #6
On 7/25/23 17:41, Nina Schoetterl-Glausch wrote:
> On Fri, 2023-06-30 at 11:17 +0200, Pierre Morel wrote:
>> On interception of STSI(15.1.x) the System Information Block
>> (SYSIB) is built from the list of pre-ordered topology entries.
>>
>> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
>> ---
>>   MAINTAINERS                      |   1 +
>>   qapi/machine-target.json         |  14 ++
>>   include/hw/s390x/cpu-topology.h  |  25 +++
>>   include/hw/s390x/sclp.h          |   1 +
>>   target/s390x/cpu.h               |  76 ++++++++
>>   hw/s390x/cpu-topology.c          |   4 +-
>>   target/s390x/kvm/kvm.c           |   5 +-
>>   target/s390x/kvm/stsi-topology.c | 310 +++++++++++++++++++++++++++++++
>>   target/s390x/kvm/meson.build     |   3 +-
>>   9 files changed, 436 insertions(+), 3 deletions(-)
>>   create mode 100644 target/s390x/kvm/stsi-topology.c
> [...]
>
>>   typedef struct S390Topology {
>>       uint8_t *cores_per_socket;
>> +    bool polarization;
> You don't use this as a bool and since it's no longer called
> vertical_polarization, it's not longer entirely clear what the value
> means so I think this should be a CpuS390Polarization.
> That also makes the assignment in patch 12 clearer since it assigns the
> same type.
>
> [...]


right, I make the change

Thanks

Pierre
Nina Schoetterl-Glausch July 27, 2023, 5:31 p.m. UTC | #7
On Fri, 2023-06-30 at 11:17 +0200, Pierre Morel wrote:
> On interception of STSI(15.1.x) the System Information Block
> (SYSIB) is built from the list of pre-ordered topology entries.
> 
> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
> ---
>  MAINTAINERS                      |   1 +
>  qapi/machine-target.json         |  14 ++
>  include/hw/s390x/cpu-topology.h  |  25 +++
>  include/hw/s390x/sclp.h          |   1 +
>  target/s390x/cpu.h               |  76 ++++++++
>  hw/s390x/cpu-topology.c          |   4 +-
>  target/s390x/kvm/kvm.c           |   5 +-
>  target/s390x/kvm/stsi-topology.c | 310 +++++++++++++++++++++++++++++++
>  target/s390x/kvm/meson.build     |   3 +-
>  9 files changed, 436 insertions(+), 3 deletions(-)
>  create mode 100644 target/s390x/kvm/stsi-topology.c
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 0b03ac5a9b..b8d3e8815c 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1702,6 +1702,7 @@ M: Pierre Morel <pmorel@linux.ibm.com>
>  S: Supported
>  F: include/hw/s390x/cpu-topology.h
>  F: hw/s390x/cpu-topology.c
> +F: target/s390x/kvm/stsi-topology.c
>  
>  X86 Machines
>  ------------
> diff --git a/qapi/machine-target.json b/qapi/machine-target.json
> index 3362f8dc3f..8ea4834e63 100644
> --- a/qapi/machine-target.json
> +++ b/qapi/machine-target.json
> @@ -361,3 +361,17 @@
>                     'TARGET_MIPS',
>                     'TARGET_LOONGARCH64',
>                     'TARGET_RISCV' ] } }
> +
> +##
> +# @CpuS390Polarization:
> +#
> +# An enumeration of cpu polarization that can be assumed by a virtual
> +# S390 CPU
> +#
> +# Since: 8.1
> +##
> +{ 'enum': 'CpuS390Polarization',
> +  'prefix': 'S390_CPU_POLARIZATION',
> +  'data': [ 'horizontal', 'vertical' ],
> +    'if': { 'all': [ 'TARGET_S390X' , 'CONFIG_KVM' ] }
> +}
> diff --git a/include/hw/s390x/cpu-topology.h b/include/hw/s390x/cpu-topology.h
> index 9164ac00a7..193b33a2fc 100644
> --- a/include/hw/s390x/cpu-topology.h
> +++ b/include/hw/s390x/cpu-topology.h
> @@ -15,10 +15,35 @@
>  #include "hw/boards.h"
>  #include "qapi/qapi-types-machine-target.h"
>  
> +#define S390_TOPOLOGY_CPU_IFL   0x03
> +
> +typedef union s390_topology_id {
> +    uint64_t id;
> +    struct {
> +        uint8_t sentinel;
> +        uint8_t drawer;
> +        uint8_t book;
> +        uint8_t socket;
> +        uint8_t dedicated;
> +        uint8_t entitlement;
> +        uint8_t type;
> +        uint8_t origin;

The order here is not quite right according to the PoP. Type should be
higher, after socket, such that all cpus of the same type in a socket
are stored as a block.
Also entitlement and dedication need to be inverted, e.g such that
dedicated cpus are shown before non dedicated ones.

> +    };
> +} s390_topology_id;
> +

[...]

> +/**
> + * s390_topology_empty_list:
> + *
> + * Clear all entries in the S390Topology list except the sentinel.

The comment is out of date.

> + */
> +static void s390_topology_empty_list(S390TopologyList *topology_list)
> +{
> +    S390TopologyEntry *entry = NULL;
> +    S390TopologyEntry *tmp = NULL;
> +
> +    QTAILQ_FOREACH_SAFE(entry, topology_list, next, tmp) {
> +        QTAILQ_REMOVE(topology_list, entry, next);
> +        g_free(entry);
> +    }
> +}
> +
> +/**
> + * insert_stsi_15_1_x:
> + * cpu: the CPU doing the call for which we set CC
> + * sel2: the selector 2, containing the nested level
> + * addr: Guest logical address of the guest SysIB
> + * ar: the access register number
> + *
> + * Create a list head for the Topology entries and initialize it.
> + * Insert the first entry as a sentinelle.
> + *
> + * Emulate STSI 15.1.x, that is, perform all necessary checks and
> + * fill the SYSIB.
> + * In case the topology description is too long to fit into the SYSIB,
> + * set CC=3 and abort without writing the SYSIB.
> + */
> +void insert_stsi_15_1_x(S390CPU *cpu, int sel2, uint64_t addr, uint8_t ar)
> +{
> +    S390TopologyList topology_list;
> +    S390TopologyEntry *entry;
> +    SysIB sysib = {0};
> +    int length;
> +
> +    if (!s390_has_topology() || sel2 < 2 || sel2 > SCLP_READ_SCP_INFO_MNEST) {
> +        setcc(cpu, 3);
> +        return;
> +    }
> +
> +    QTAILQ_INIT(&topology_list);
> +    entry = g_malloc0(sizeof(S390TopologyEntry));
> +    entry->id.sentinel = 0xff;
> +    QTAILQ_INSERT_HEAD(&topology_list, entry, next);
> +
> +    s390_topology_fill_list_sorted(&topology_list);
> +
> +    length = setup_stsi(&topology_list, &sysib.sysib_151x, sel2);
> +
> +    if (!length) {
> +        setcc(cpu, 3);
> +        return;
> +    }
> +
> +    sysib.sysib_151x.length = cpu_to_be16(length);
> +    s390_cpu_virt_mem_write(cpu, addr, ar, &sysib, length);
> +    setcc(cpu, 0);
> +
> +    s390_topology_empty_list(&topology_list);
> +}
> diff --git a/target/s390x/kvm/meson.build b/target/s390x/kvm/meson.build
> index 37253f75bf..bcf014ba87 100644
> --- a/target/s390x/kvm/meson.build
> +++ b/target/s390x/kvm/meson.build
> @@ -1,6 +1,7 @@
>  
>  s390x_ss.add(when: 'CONFIG_KVM', if_true: files(
> -  'kvm.c'
> +  'kvm.c',
> +  'stsi-topology.c'
>  ), if_false: files(
>    'stubs.c'
>  ))
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index 0b03ac5a9b..b8d3e8815c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1702,6 +1702,7 @@  M: Pierre Morel <pmorel@linux.ibm.com>
 S: Supported
 F: include/hw/s390x/cpu-topology.h
 F: hw/s390x/cpu-topology.c
+F: target/s390x/kvm/stsi-topology.c
 
 X86 Machines
 ------------
diff --git a/qapi/machine-target.json b/qapi/machine-target.json
index 3362f8dc3f..8ea4834e63 100644
--- a/qapi/machine-target.json
+++ b/qapi/machine-target.json
@@ -361,3 +361,17 @@ 
                    'TARGET_MIPS',
                    'TARGET_LOONGARCH64',
                    'TARGET_RISCV' ] } }
+
+##
+# @CpuS390Polarization:
+#
+# An enumeration of cpu polarization that can be assumed by a virtual
+# S390 CPU
+#
+# Since: 8.1
+##
+{ 'enum': 'CpuS390Polarization',
+  'prefix': 'S390_CPU_POLARIZATION',
+  'data': [ 'horizontal', 'vertical' ],
+    'if': { 'all': [ 'TARGET_S390X' , 'CONFIG_KVM' ] }
+}
diff --git a/include/hw/s390x/cpu-topology.h b/include/hw/s390x/cpu-topology.h
index 9164ac00a7..193b33a2fc 100644
--- a/include/hw/s390x/cpu-topology.h
+++ b/include/hw/s390x/cpu-topology.h
@@ -15,10 +15,35 @@ 
 #include "hw/boards.h"
 #include "qapi/qapi-types-machine-target.h"
 
+#define S390_TOPOLOGY_CPU_IFL   0x03
+
+typedef union s390_topology_id {
+    uint64_t id;
+    struct {
+        uint8_t sentinel;
+        uint8_t drawer;
+        uint8_t book;
+        uint8_t socket;
+        uint8_t dedicated;
+        uint8_t entitlement;
+        uint8_t type;
+        uint8_t origin;
+    };
+} s390_topology_id;
+
+typedef struct S390TopologyEntry {
+    QTAILQ_ENTRY(S390TopologyEntry) next;
+    s390_topology_id id;
+    uint64_t mask;
+} S390TopologyEntry;
+
 typedef struct S390Topology {
     uint8_t *cores_per_socket;
+    bool polarization;
 } S390Topology;
 
+typedef QTAILQ_HEAD(, S390TopologyEntry) S390TopologyList;
+
 #ifdef CONFIG_KVM
 bool s390_has_topology(void);
 void s390_topology_setup_cpu(MachineState *ms, S390CPU *cpu, Error **errp);
diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h
index d3ade40a5a..712fd68123 100644
--- a/include/hw/s390x/sclp.h
+++ b/include/hw/s390x/sclp.h
@@ -112,6 +112,7 @@  typedef struct CPUEntry {
 } QEMU_PACKED CPUEntry;
 
 #define SCLP_READ_SCP_INFO_FIXED_CPU_OFFSET     128
+#define SCLP_READ_SCP_INFO_MNEST                2
 typedef struct ReadInfo {
     SCCBHeader h;
     uint16_t rnmax;
diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index 7ebd5e05b6..6e7d041b01 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -569,6 +569,29 @@  typedef struct SysIB_322 {
 } SysIB_322;
 QEMU_BUILD_BUG_ON(sizeof(SysIB_322) != 4096);
 
+/*
+ * Topology Magnitude fields (MAG) indicates the maximum number of
+ * topology list entries (TLE) at the corresponding nesting level.
+ */
+#define S390_TOPOLOGY_MAG  6
+#define S390_TOPOLOGY_MAG6 0
+#define S390_TOPOLOGY_MAG5 1
+#define S390_TOPOLOGY_MAG4 2
+#define S390_TOPOLOGY_MAG3 3
+#define S390_TOPOLOGY_MAG2 4
+#define S390_TOPOLOGY_MAG1 5
+/* Configuration topology */
+typedef struct SysIB_151x {
+    uint8_t  reserved0[2];
+    uint16_t length;
+    uint8_t  mag[S390_TOPOLOGY_MAG];
+    uint8_t  reserved1;
+    uint8_t  mnest;
+    uint32_t reserved2;
+    char tle[];
+} SysIB_151x;
+QEMU_BUILD_BUG_ON(sizeof(SysIB_151x) != 16);
+
 typedef union SysIB {
     SysIB_111 sysib_111;
     SysIB_121 sysib_121;
@@ -576,9 +599,62 @@  typedef union SysIB {
     SysIB_221 sysib_221;
     SysIB_222 sysib_222;
     SysIB_322 sysib_322;
+    SysIB_151x sysib_151x;
 } SysIB;
 QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096);
 
+/*
+ * CPU Topology List provided by STSI with fc=15 provides a list
+ * of two different Topology List Entries (TLE) types to specify
+ * the topology hierarchy.
+ *
+ * - Container Topology List Entry
+ *   Defines a container to contain other Topology List Entries
+ *   of any type, nested containers or CPU.
+ * - CPU Topology List Entry
+ *   Specifies the CPUs position, type, entitlement and polarization
+ *   of the CPUs contained in the last Container TLE.
+ *
+ * There can be theoretically up to five levels of containers, QEMU
+ * uses only three levels, the drawer's, book's and socket's level.
+ *
+ * A container with a nesting level (NL) greater than 1 can only
+ * contain another container of nesting level NL-1.
+ *
+ * A container of nesting level 1 (socket), contains as many CPU TLE
+ * as needed to describe the position and qualities of all CPUs inside
+ * the container.
+ * The qualities of a CPU are polarization, entitlement and type.
+ *
+ * The CPU TLE defines the position of the CPUs of identical qualities
+ * using a 64bits mask which first bit has its offset defined by
+ * the CPU address orgin field of the CPU TLE like in:
+ * CPU address = origin * 64 + bit position within the mask
+ *
+ */
+/* Container type Topology List Entry */
+typedef struct SysIBTl_container {
+        uint8_t nl;
+        uint8_t reserved[6];
+        uint8_t id;
+} SysIBTl_container;
+QEMU_BUILD_BUG_ON(sizeof(SysIBTl_container) != 8);
+
+/* CPU type Topology List Entry */
+typedef struct SysIBTl_cpu {
+        uint8_t nl;
+        uint8_t reserved0[3];
+#define SYSIB_TLE_POLARITY_MASK 0x03
+#define SYSIB_TLE_DEDICATED     0x04
+        uint8_t flags;
+        uint8_t type;
+        uint16_t origin;
+        uint64_t mask;
+} SysIBTl_cpu;
+QEMU_BUILD_BUG_ON(sizeof(SysIBTl_cpu) != 16);
+
+void insert_stsi_15_1_x(S390CPU *cpu, int sel2, uint64_t addr, uint8_t ar);
+
 /* MMU defines */
 #define ASCE_ORIGIN           (~0xfffULL) /* segment table origin             */
 #define ASCE_SUBSPACE         0x200       /* subspace group control           */
diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
index b163c17f8f..ca1634d0ce 100644
--- a/hw/s390x/cpu-topology.c
+++ b/hw/s390x/cpu-topology.c
@@ -29,11 +29,13 @@ 
  * .cores_per_socket: tracks information on the count of cores
  *                    per socket.
  * .smp: keeps track of the machine topology.
- *
+ * .list: queue the topology entries inside which
+ *        we keep the information on the CPU topology.
  */
 S390Topology s390_topology = {
     /* will be initialized after the cpu model is realized */
     .cores_per_socket = NULL,
+    .polarization = S390_CPU_POLARIZATION_HORIZONTAL,
 };
 
 /**
diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
index 3ac7ec9acf..5ea358cbb0 100644
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -1919,9 +1919,12 @@  static int handle_stsi(S390CPU *cpu)
         if (run->s390_stsi.sel1 != 2 || run->s390_stsi.sel2 != 2) {
             return 0;
         }
-        /* Only sysib 3.2.2 needs post-handling for now. */
         insert_stsi_3_2_2(cpu, run->s390_stsi.addr, run->s390_stsi.ar);
         return 0;
+    case 15:
+        insert_stsi_15_1_x(cpu, run->s390_stsi.sel2, run->s390_stsi.addr,
+                           run->s390_stsi.ar);
+        return 0;
     default:
         return 0;
     }
diff --git a/target/s390x/kvm/stsi-topology.c b/target/s390x/kvm/stsi-topology.c
new file mode 100644
index 0000000000..0b789450da
--- /dev/null
+++ b/target/s390x/kvm/stsi-topology.c
@@ -0,0 +1,310 @@ 
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * QEMU S390x CPU Topology
+ *
+ * Copyright IBM Corp. 2022,2023
+ * Author(s): Pierre Morel <pmorel@linux.ibm.com>
+ *
+ */
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "hw/s390x/pv.h"
+#include "hw/sysbus.h"
+#include "hw/s390x/sclp.h"
+#include "hw/s390x/cpu-topology.h"
+
+/**
+ * fill_container:
+ * @p: The address of the container TLE to fill
+ * @level: The level of nesting for this container
+ * @id: The container receives a unique ID inside its own container
+ *
+ * Returns the next free TLE entry.
+ */
+static char *fill_container(char *p, int level, int id)
+{
+    SysIBTl_container *tle = (SysIBTl_container *)p;
+
+    tle->nl = level;
+    tle->id = id;
+    return p + sizeof(*tle);
+}
+
+/**
+ * fill_tle_cpu:
+ * @p: The address of the CPU TLE to fill
+ * @entry: a pointer to the S390TopologyEntry defining this
+ *         CPU container.
+ *
+ * Returns the next free TLE entry.
+ */
+static char *fill_tle_cpu(char *p, S390TopologyEntry *entry)
+{
+    SysIBTl_cpu *tle = (SysIBTl_cpu *)p;
+    s390_topology_id topology_id = entry->id;
+
+    tle->nl = 0;
+    if (topology_id.dedicated) {
+        tle->flags = SYSIB_TLE_DEDICATED;
+    }
+    tle->flags |= topology_id.entitlement;
+    tle->type = topology_id.type;
+    tle->origin = cpu_to_be16(topology_id.origin * 64);
+    tle->mask = cpu_to_be64(entry->mask);
+    return p + sizeof(*tle);
+}
+
+/*
+ * Macro to check that the size of data after increment
+ * will not get bigger than the size of the SysIB.
+ */
+#define SYSIB_GUARD(data, x) do {       \
+        data += x;                      \
+        if (data > sizeof(SysIB)) {    \
+            return 0;                   \
+        }                               \
+    } while (0)
+
+/**
+ * stsi_topology_fill_sysib:
+ * @p: A pointer to the position of the first TLE
+ * @level: The nested level wanted by the guest
+ *
+ * Fill the SYSIB with the topology information as described in
+ * the PoP, nesting containers as appropriate, with the maximum
+ * nesting limited by @level.
+ *
+ * Return value:
+ * On success: the size of the SysIB_15x after being filled with TLE.
+ * On error: 0 in the case we would overrun the end of the SysIB.
+ */
+static int stsi_topology_fill_sysib(S390TopologyList *topology_list,
+                                    char *p, int level)
+{
+    S390TopologyEntry *entry;
+    int last_drawer = -1;
+    int last_book = -1;
+    int last_socket = -1;
+    int drawer_id = 0;
+    int book_id = 0;
+    int socket_id = 0;
+    int n = sizeof(SysIB_151x);
+
+    QTAILQ_FOREACH(entry, topology_list, next) {
+        bool drawer_change = last_drawer != entry->id.drawer;
+        bool book_change = drawer_change || last_book != entry->id.book;
+        bool socket_change = book_change || last_socket != entry->id.socket;
+
+        /* If we reach the sentinel get out */
+        if (entry->id.sentinel) {
+            break;
+        }
+
+        if (level > 3 && drawer_change) {
+            SYSIB_GUARD(n, sizeof(SysIBTl_container));
+            p = fill_container(p, 3, drawer_id++);
+            book_id = 0;
+        }
+        if (level > 2 && book_change) {
+            SYSIB_GUARD(n, sizeof(SysIBTl_container));
+            p = fill_container(p, 2, book_id++);
+            socket_id = 0;
+        }
+        if (socket_change) {
+            SYSIB_GUARD(n, sizeof(SysIBTl_container));
+            p = fill_container(p, 1, socket_id++);
+        }
+
+        SYSIB_GUARD(n, sizeof(SysIBTl_cpu));
+        p = fill_tle_cpu(p, entry);
+        last_drawer = entry->id.drawer;
+        last_book = entry->id.book;
+        last_socket = entry->id.socket;
+    }
+
+    return n;
+}
+
+/**
+ * setup_stsi:
+ * sysib: pointer to a SysIB to be filled with SysIB_151x data
+ * level: Nested level specified by the guest
+ *
+ * Setup the SYSIB for STSI 15.1, the header as well as the description
+ * of the topology.
+ */
+static int setup_stsi(S390TopologyList *topology_list, SysIB_151x *sysib,
+                      int level)
+{
+    sysib->mnest = level;
+    switch (level) {
+    case 4:
+        sysib->mag[S390_TOPOLOGY_MAG4] = current_machine->smp.drawers;
+        sysib->mag[S390_TOPOLOGY_MAG3] = current_machine->smp.books;
+        sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.sockets;
+        sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores;
+        break;
+    case 3:
+        sysib->mag[S390_TOPOLOGY_MAG3] = current_machine->smp.drawers *
+                                         current_machine->smp.books;
+        sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.sockets;
+        sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores;
+        break;
+    case 2:
+        sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.drawers *
+                                         current_machine->smp.books *
+                                         current_machine->smp.sockets;
+        sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores;
+        break;
+    }
+
+    return stsi_topology_fill_sysib(topology_list, sysib->tle, level);
+}
+
+/**
+ * s390_topology_add_cpu_to_entry:
+ * @entry: Topology entry to setup
+ * @cpu: the S390CPU to add
+ *
+ * Set the core bit inside the topology mask and
+ * increments the number of cores for the socket.
+ */
+static void s390_topology_add_cpu_to_entry(S390TopologyEntry *entry,
+                                           S390CPU *cpu)
+{
+    set_bit(63 - (cpu->env.core_id % 64), &entry->mask);
+}
+
+/**
+ * s390_topology_from_cpu:
+ * @cpu: The S390CPU
+ *
+ * Initialize the topology id from the CPU environment.
+ */
+static s390_topology_id s390_topology_from_cpu(S390CPU *cpu)
+{
+    s390_topology_id topology_id = {0};
+
+    topology_id.drawer = cpu->env.drawer_id;
+    topology_id.book = cpu->env.book_id;
+    topology_id.socket = cpu->env.socket_id;
+    topology_id.origin = cpu->env.core_id / 64;
+    topology_id.type = S390_TOPOLOGY_CPU_IFL;
+    topology_id.dedicated = cpu->env.dedicated;
+
+    if (s390_topology.polarization == S390_CPU_POLARIZATION_VERTICAL) {
+        topology_id.entitlement = cpu->env.entitlement;
+    }
+
+    return topology_id;
+}
+
+/**
+ * s390_topology_insert:
+ * @cpu: s390CPU insert.
+ *
+ * Parse the topology list to find if the entry already
+ * exist and add the core in it.
+ * If it does not exist, allocate a new entry and insert
+ * it in the queue from lower id to greater id.
+ */
+static void s390_topology_insert(S390TopologyList *topology_list, S390CPU *cpu)
+{
+    s390_topology_id id = s390_topology_from_cpu(cpu);
+    S390TopologyEntry *entry = NULL;
+    S390TopologyEntry *tmp = NULL;
+
+    QTAILQ_FOREACH(tmp, topology_list, next) {
+        if (id.id == tmp->id.id) {
+            s390_topology_add_cpu_to_entry(tmp, cpu);
+            return;
+        } else if (id.id < tmp->id.id) {
+            entry = g_malloc0(sizeof(S390TopologyEntry));
+            entry->id.id = id.id;
+            s390_topology_add_cpu_to_entry(entry, cpu);
+            QTAILQ_INSERT_BEFORE(tmp, entry, next);
+            return;
+        }
+    }
+}
+
+/**
+ * s390_topology_fill_list_sorted:
+ *
+ * Loop over all CPU and insert it at the right place
+ * inside the TLE entry list.
+ * Fill the S390Topology list with entries according to the order
+ * specified by the PoP.
+ */
+static void s390_topology_fill_list_sorted(S390TopologyList *topology_list)
+{
+    CPUState *cs;
+
+    CPU_FOREACH(cs) {
+        s390_topology_insert(topology_list, S390_CPU(cs));
+    }
+}
+
+/**
+ * s390_topology_empty_list:
+ *
+ * Clear all entries in the S390Topology list except the sentinel.
+ */
+static void s390_topology_empty_list(S390TopologyList *topology_list)
+{
+    S390TopologyEntry *entry = NULL;
+    S390TopologyEntry *tmp = NULL;
+
+    QTAILQ_FOREACH_SAFE(entry, topology_list, next, tmp) {
+        QTAILQ_REMOVE(topology_list, entry, next);
+        g_free(entry);
+    }
+}
+
+/**
+ * insert_stsi_15_1_x:
+ * cpu: the CPU doing the call for which we set CC
+ * sel2: the selector 2, containing the nested level
+ * addr: Guest logical address of the guest SysIB
+ * ar: the access register number
+ *
+ * Create a list head for the Topology entries and initialize it.
+ * Insert the first entry as a sentinelle.
+ *
+ * Emulate STSI 15.1.x, that is, perform all necessary checks and
+ * fill the SYSIB.
+ * In case the topology description is too long to fit into the SYSIB,
+ * set CC=3 and abort without writing the SYSIB.
+ */
+void insert_stsi_15_1_x(S390CPU *cpu, int sel2, uint64_t addr, uint8_t ar)
+{
+    S390TopologyList topology_list;
+    S390TopologyEntry *entry;
+    SysIB sysib = {0};
+    int length;
+
+    if (!s390_has_topology() || sel2 < 2 || sel2 > SCLP_READ_SCP_INFO_MNEST) {
+        setcc(cpu, 3);
+        return;
+    }
+
+    QTAILQ_INIT(&topology_list);
+    entry = g_malloc0(sizeof(S390TopologyEntry));
+    entry->id.sentinel = 0xff;
+    QTAILQ_INSERT_HEAD(&topology_list, entry, next);
+
+    s390_topology_fill_list_sorted(&topology_list);
+
+    length = setup_stsi(&topology_list, &sysib.sysib_151x, sel2);
+
+    if (!length) {
+        setcc(cpu, 3);
+        return;
+    }
+
+    sysib.sysib_151x.length = cpu_to_be16(length);
+    s390_cpu_virt_mem_write(cpu, addr, ar, &sysib, length);
+    setcc(cpu, 0);
+
+    s390_topology_empty_list(&topology_list);
+}
diff --git a/target/s390x/kvm/meson.build b/target/s390x/kvm/meson.build
index 37253f75bf..bcf014ba87 100644
--- a/target/s390x/kvm/meson.build
+++ b/target/s390x/kvm/meson.build
@@ -1,6 +1,7 @@ 
 
 s390x_ss.add(when: 'CONFIG_KVM', if_true: files(
-  'kvm.c'
+  'kvm.c',
+  'stsi-topology.c'
 ), if_false: files(
   'stubs.c'
 ))