diff mbox series

[for-6.2,v5,3/5] hw/arm/virt: Add cpu-map to device tree

Message ID 20210805123921.62540-4-wangyanan55@huawei.com
State New
Headers show
Series hw/arm/virt: Introduce cpu topology support | expand

Commit Message

wangyanan (Y) Aug. 5, 2021, 12:39 p.m. UTC
From: Andrew Jones <drjones@redhat.com>

Support device tree CPU topology descriptions.

In accordance with the Devicetree Specification, the Linux Doc
"arm/cpus.yaml" requires that cpus and cpu nodes in the DT are
present. And we have already met the requirement by generating
/cpus/cpu@* nodes for members within ms->smp.cpus. Accordingly,
we should also create subnodes in cpu-map for the present cpus,
each of which relates to an unique cpu node.

The Linux Doc "cpu/cpu-topology.txt" states that the hierarchy
of CPUs in a SMP system is defined through four entities and
they are socket/cluster/core/thread. It is also required that
a socket node's child nodes must be one or more cluster nodes.
Given that currently we are only provided with information of
socket/core/thread, we assume there is one cluster child node
in each socket node when creating cpu-map.

Signed-off-by: Andrew Jones <drjones@redhat.com>
Co-developed-by: Yanan Wang <wangyanan55@huawei.com>
Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
---
 hw/arm/virt.c | 59 ++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 49 insertions(+), 10 deletions(-)

Comments

wangyanan (Y) Aug. 17, 2021, 2:10 a.m. UTC | #1
Hi,
On 2021/8/5 20:39, Yanan Wang wrote:
> From: Andrew Jones <drjones@redhat.com>
>
> Support device tree CPU topology descriptions.
>
> In accordance with the Devicetree Specification, the Linux Doc
> "arm/cpus.yaml" requires that cpus and cpu nodes in the DT are
> present. And we have already met the requirement by generating
> /cpus/cpu@* nodes for members within ms->smp.cpus. Accordingly,
> we should also create subnodes in cpu-map for the present cpus,
> each of which relates to an unique cpu node.
>
> The Linux Doc "cpu/cpu-topology.txt" states that the hierarchy
> of CPUs in a SMP system is defined through four entities and
> they are socket/cluster/core/thread. It is also required that
> a socket node's child nodes must be one or more cluster nodes.
> Given that currently we are only provided with information of
> socket/core/thread, we assume there is one cluster child node
> in each socket node when creating cpu-map.
>
> Signed-off-by: Andrew Jones <drjones@redhat.com>
> Co-developed-by: Yanan Wang <wangyanan55@huawei.com>
> Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
> ---
>   hw/arm/virt.c | 59 ++++++++++++++++++++++++++++++++++++++++++---------
>   1 file changed, 49 insertions(+), 10 deletions(-)
>
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index 82f2eba6bd..d1e294be95 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -350,20 +350,21 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms)
>       int cpu;
>       int addr_cells = 1;
>       const MachineState *ms = MACHINE(vms);
> +    const VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
>       int smp_cpus = ms->smp.cpus;
>   
>       /*
> -     * From Documentation/devicetree/bindings/arm/cpus.txt
> -     *  On ARM v8 64-bit systems value should be set to 2,
> -     *  that corresponds to the MPIDR_EL1 register size.
> -     *  If MPIDR_EL1[63:32] value is equal to 0 on all CPUs
> -     *  in the system, #address-cells can be set to 1, since
> -     *  MPIDR_EL1[63:32] bits are not used for CPUs
> -     *  identification.
> +     * See Linux Documentation/devicetree/bindings/arm/cpus.yaml
> +     * On ARM v8 64-bit systems value should be set to 2,
> +     * that corresponds to the MPIDR_EL1 register size.
> +     * If MPIDR_EL1[63:32] value is equal to 0 on all CPUs
> +     * in the system, #address-cells can be set to 1, since
> +     * MPIDR_EL1[63:32] bits are not used for CPUs
> +     * identification.
>        *
> -     *  Here we actually don't know whether our system is 32- or 64-bit one.
> -     *  The simplest way to go is to examine affinity IDs of all our CPUs. If
> -     *  at least one of them has Aff3 populated, we set #address-cells to 2.
> +     * Here we actually don't know whether our system is 32- or 64-bit one.
> +     * The simplest way to go is to examine affinity IDs of all our CPUs. If
> +     * at least one of them has Aff3 populated, we set #address-cells to 2.
>        */
>       for (cpu = 0; cpu < smp_cpus; cpu++) {
>           ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu));
> @@ -406,8 +407,46 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms)
>                   ms->possible_cpus->cpus[cs->cpu_index].props.node_id);
>           }
>   
> +        if (!vmc->no_cpu_topology) {
> +            qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle",
> +                                  qemu_fdt_alloc_phandle(ms->fdt));
> +        }
> +
>           g_free(nodename);
>       }
> +
> +    if (!vmc->no_cpu_topology) {
> +        /*
> +         * See Linux Documentation/devicetree/bindings/cpu/cpu-topology.txt
> +         * In a SMP system, the hierarchy of CPUs is defined through four
> +         * entities that are used to describe the layout of physical CPUs
> +         * in the system: socket/cluster/core/thread.
> +         */
> +        qemu_fdt_add_subnode(ms->fdt, "/cpus/cpu-map");
> +
> +        for (cpu = smp_cpus - 1; cpu >= 0; cpu--) {
> +            char *cpu_path = g_strdup_printf("/cpus/cpu@%d", cpu);
> +            char *map_path;
> +
> +            if (ms->smp.threads > 1) {
> +                map_path = g_strdup_printf(
> +                    "/cpus/cpu-map/socket%d/cluster0/core%d/thread%d",
> +                    cpu / (ms->smp.cores * ms->smp.threads),
> +                    (cpu / ms->smp.threads) % ms->smp.cores,
> +                    cpu % ms->smp.threads);
It seems that there is some discrepancy between the documentation
(Documentation/devicetree/bindings/cpu/cpu-topology.txt) and the
actual implementation of DT topology parser for ARM64
(function parse_dt_topology() in drivers/base/arch_topology.c).

The doc says the cpu-map node's child nodes can be:
     - one or more cluster nodes or
     - one or more socket nodes in a multi-socket system
which means a cpu-map can be defined as two formats such as:
1) cpu-map
                    socket0
                                 cluster0
                                              core0
                                              core1
                                 cluster1
                                              core0
                                              core1
                    socket1
                                 cluster0
                                              core0
                                              core1
                                 cluster1
                                              core0
                                              core1

2) cpu-map
                    cluster0
                                 cluster0
                                              core0
                                              core1
                                 cluster1
                                              core0
                                              core1
                    cluster1
                                 cluster0
                                              core0
                                              core1
                                 cluster1
                                              core0
                                              core1

But current parser only assumes that there are nested clusters within
cpu-map and is unaware of socket, the parser also ignore any information
about the nesting of clusters and present the scheduler with a flat list of
them. So based on current parser, we will get 4 packages (sockets) totally,
2 cores per package, 1 threads per core from 2), but will get nothing
useful from 1).

I think the ARM64 kernel DT parser should be optimized so that it's
also aware of sockets and can parse both formats of cpu-map. But
before this, I think we still have to build the cpu-map in format 2) if
we hope to describe topology successfully through DT. :)

Thanks,
Yanan
.
> +            } else {
> +                map_path = g_strdup_printf(
> +                    "/cpus/cpu-map/socket%d/cluster0/core%d",
> +                    cpu / ms->smp.cores,
> +                    cpu % ms->smp.cores);
> +            }
> +            qemu_fdt_add_path(ms->fdt, map_path);
> +            qemu_fdt_setprop_phandle(ms->fdt, map_path, "cpu", cpu_path);
> +
> +            g_free(map_path);
> +            g_free(cpu_path);
> +        }
> +    }
>   }
>   
>   static void fdt_add_its_gic_node(VirtMachineState *vms)
Andrew Jones Aug. 17, 2021, 11:51 a.m. UTC | #2
On Tue, Aug 17, 2021 at 10:10:44AM +0800, wangyanan (Y) wrote:
> Hi,
> On 2021/8/5 20:39, Yanan Wang wrote:
> > From: Andrew Jones <drjones@redhat.com>
> > 
> > Support device tree CPU topology descriptions.
> > 
> > In accordance with the Devicetree Specification, the Linux Doc
> > "arm/cpus.yaml" requires that cpus and cpu nodes in the DT are
> > present. And we have already met the requirement by generating
> > /cpus/cpu@* nodes for members within ms->smp.cpus. Accordingly,
> > we should also create subnodes in cpu-map for the present cpus,
> > each of which relates to an unique cpu node.
> > 
> > The Linux Doc "cpu/cpu-topology.txt" states that the hierarchy
> > of CPUs in a SMP system is defined through four entities and
> > they are socket/cluster/core/thread. It is also required that
> > a socket node's child nodes must be one or more cluster nodes.
> > Given that currently we are only provided with information of
> > socket/core/thread, we assume there is one cluster child node
> > in each socket node when creating cpu-map.
> > 
> > Signed-off-by: Andrew Jones <drjones@redhat.com>
> > Co-developed-by: Yanan Wang <wangyanan55@huawei.com>
> > Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
> > ---
> >   hw/arm/virt.c | 59 ++++++++++++++++++++++++++++++++++++++++++---------
> >   1 file changed, 49 insertions(+), 10 deletions(-)
> > 
> > diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> > index 82f2eba6bd..d1e294be95 100644
> > --- a/hw/arm/virt.c
> > +++ b/hw/arm/virt.c
> > @@ -350,20 +350,21 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms)
> >       int cpu;
> >       int addr_cells = 1;
> >       const MachineState *ms = MACHINE(vms);
> > +    const VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
> >       int smp_cpus = ms->smp.cpus;
> >       /*
> > -     * From Documentation/devicetree/bindings/arm/cpus.txt
> > -     *  On ARM v8 64-bit systems value should be set to 2,
> > -     *  that corresponds to the MPIDR_EL1 register size.
> > -     *  If MPIDR_EL1[63:32] value is equal to 0 on all CPUs
> > -     *  in the system, #address-cells can be set to 1, since
> > -     *  MPIDR_EL1[63:32] bits are not used for CPUs
> > -     *  identification.
> > +     * See Linux Documentation/devicetree/bindings/arm/cpus.yaml
> > +     * On ARM v8 64-bit systems value should be set to 2,
> > +     * that corresponds to the MPIDR_EL1 register size.
> > +     * If MPIDR_EL1[63:32] value is equal to 0 on all CPUs
> > +     * in the system, #address-cells can be set to 1, since
> > +     * MPIDR_EL1[63:32] bits are not used for CPUs
> > +     * identification.
> >        *
> > -     *  Here we actually don't know whether our system is 32- or 64-bit one.
> > -     *  The simplest way to go is to examine affinity IDs of all our CPUs. If
> > -     *  at least one of them has Aff3 populated, we set #address-cells to 2.
> > +     * Here we actually don't know whether our system is 32- or 64-bit one.
> > +     * The simplest way to go is to examine affinity IDs of all our CPUs. If
> > +     * at least one of them has Aff3 populated, we set #address-cells to 2.
> >        */
> >       for (cpu = 0; cpu < smp_cpus; cpu++) {
> >           ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu));
> > @@ -406,8 +407,46 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms)
> >                   ms->possible_cpus->cpus[cs->cpu_index].props.node_id);
> >           }
> > +        if (!vmc->no_cpu_topology) {
> > +            qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle",
> > +                                  qemu_fdt_alloc_phandle(ms->fdt));
> > +        }
> > +
> >           g_free(nodename);
> >       }
> > +
> > +    if (!vmc->no_cpu_topology) {
> > +        /*
> > +         * See Linux Documentation/devicetree/bindings/cpu/cpu-topology.txt
> > +         * In a SMP system, the hierarchy of CPUs is defined through four
> > +         * entities that are used to describe the layout of physical CPUs
> > +         * in the system: socket/cluster/core/thread.
> > +         */
> > +        qemu_fdt_add_subnode(ms->fdt, "/cpus/cpu-map");
> > +
> > +        for (cpu = smp_cpus - 1; cpu >= 0; cpu--) {
> > +            char *cpu_path = g_strdup_printf("/cpus/cpu@%d", cpu);
> > +            char *map_path;
> > +
> > +            if (ms->smp.threads > 1) {
> > +                map_path = g_strdup_printf(
> > +                    "/cpus/cpu-map/socket%d/cluster0/core%d/thread%d",
> > +                    cpu / (ms->smp.cores * ms->smp.threads),
> > +                    (cpu / ms->smp.threads) % ms->smp.cores,
> > +                    cpu % ms->smp.threads);
> It seems that there is some discrepancy between the documentation
> (Documentation/devicetree/bindings/cpu/cpu-topology.txt) and the
> actual implementation of DT topology parser for ARM64
> (function parse_dt_topology() in drivers/base/arch_topology.c).
> 
> The doc says the cpu-map node's child nodes can be:
>     - one or more cluster nodes or
>     - one or more socket nodes in a multi-socket system
> which means a cpu-map can be defined as two formats such as:
> 1) cpu-map
>                    socket0
>                                 cluster0
>                                              core0
>                                              core1
>                                 cluster1
>                                              core0
>                                              core1
>                    socket1
>                                 cluster0
>                                              core0
>                                              core1
>                                 cluster1
>                                              core0
>                                              core1
> 
> 2) cpu-map
>                    cluster0
>                                 cluster0
>                                              core0
>                                              core1
>                                 cluster1
>                                              core0
>                                              core1
>                    cluster1
>                                 cluster0
>                                              core0
>                                              core1
>                                 cluster1
>                                              core0
>                                              core1
> 
> But current parser only assumes that there are nested clusters within
> cpu-map and is unaware of socket, the parser also ignore any information
> about the nesting of clusters and present the scheduler with a flat list of
> them. So based on current parser, we will get 4 packages (sockets) totally,
> 2 cores per package, 1 threads per core from 2), but will get nothing
> useful from 1).
> 
> I think the ARM64 kernel DT parser should be optimized so that it's
> also aware of sockets and can parse both formats of cpu-map. But
> before this, I think we still have to build the cpu-map in format 2) if
> we hope to describe topology successfully through DT. :)

I would stick to implementing this per the documentation and as similarly
to the ACPI version as possible. I doubt there will be many cases where a
mach-virt VM is launched with a complex topology description and with DT
instead of ACPI anyway. And, if there is a use for that, then the Linux
parser needs to be fixed to match the documentation.

Thanks,
drew

> 
> Thanks,
> Yanan
> .
> > +            } else {
> > +                map_path = g_strdup_printf(
> > +                    "/cpus/cpu-map/socket%d/cluster0/core%d",
> > +                    cpu / ms->smp.cores,
> > +                    cpu % ms->smp.cores);
> > +            }
> > +            qemu_fdt_add_path(ms->fdt, map_path);
> > +            qemu_fdt_setprop_phandle(ms->fdt, map_path, "cpu", cpu_path);
> > +
> > +            g_free(map_path);
> > +            g_free(cpu_path);
> > +        }
> > +    }
> >   }
> >   static void fdt_add_its_gic_node(VirtMachineState *vms)
>
wangyanan (Y) Aug. 17, 2021, 1:54 p.m. UTC | #3
On 2021/8/17 19:51, Andrew Jones wrote:
> On Tue, Aug 17, 2021 at 10:10:44AM +0800, wangyanan (Y) wrote:
>> Hi,
>> On 2021/8/5 20:39, Yanan Wang wrote:
>>> From: Andrew Jones <drjones@redhat.com>
>>>
>>> Support device tree CPU topology descriptions.
>>>
>>> In accordance with the Devicetree Specification, the Linux Doc
>>> "arm/cpus.yaml" requires that cpus and cpu nodes in the DT are
>>> present. And we have already met the requirement by generating
>>> /cpus/cpu@* nodes for members within ms->smp.cpus. Accordingly,
>>> we should also create subnodes in cpu-map for the present cpus,
>>> each of which relates to an unique cpu node.
>>>
>>> The Linux Doc "cpu/cpu-topology.txt" states that the hierarchy
>>> of CPUs in a SMP system is defined through four entities and
>>> they are socket/cluster/core/thread. It is also required that
>>> a socket node's child nodes must be one or more cluster nodes.
>>> Given that currently we are only provided with information of
>>> socket/core/thread, we assume there is one cluster child node
>>> in each socket node when creating cpu-map.
>>>
>>> Signed-off-by: Andrew Jones <drjones@redhat.com>
>>> Co-developed-by: Yanan Wang <wangyanan55@huawei.com>
>>> Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
>>> ---
>>>    hw/arm/virt.c | 59 ++++++++++++++++++++++++++++++++++++++++++---------
>>>    1 file changed, 49 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
>>> index 82f2eba6bd..d1e294be95 100644
>>> --- a/hw/arm/virt.c
>>> +++ b/hw/arm/virt.c
>>> @@ -350,20 +350,21 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms)
>>>        int cpu;
>>>        int addr_cells = 1;
>>>        const MachineState *ms = MACHINE(vms);
>>> +    const VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
>>>        int smp_cpus = ms->smp.cpus;
>>>        /*
>>> -     * From Documentation/devicetree/bindings/arm/cpus.txt
>>> -     *  On ARM v8 64-bit systems value should be set to 2,
>>> -     *  that corresponds to the MPIDR_EL1 register size.
>>> -     *  If MPIDR_EL1[63:32] value is equal to 0 on all CPUs
>>> -     *  in the system, #address-cells can be set to 1, since
>>> -     *  MPIDR_EL1[63:32] bits are not used for CPUs
>>> -     *  identification.
>>> +     * See Linux Documentation/devicetree/bindings/arm/cpus.yaml
>>> +     * On ARM v8 64-bit systems value should be set to 2,
>>> +     * that corresponds to the MPIDR_EL1 register size.
>>> +     * If MPIDR_EL1[63:32] value is equal to 0 on all CPUs
>>> +     * in the system, #address-cells can be set to 1, since
>>> +     * MPIDR_EL1[63:32] bits are not used for CPUs
>>> +     * identification.
>>>         *
>>> -     *  Here we actually don't know whether our system is 32- or 64-bit one.
>>> -     *  The simplest way to go is to examine affinity IDs of all our CPUs. If
>>> -     *  at least one of them has Aff3 populated, we set #address-cells to 2.
>>> +     * Here we actually don't know whether our system is 32- or 64-bit one.
>>> +     * The simplest way to go is to examine affinity IDs of all our CPUs. If
>>> +     * at least one of them has Aff3 populated, we set #address-cells to 2.
>>>         */
>>>        for (cpu = 0; cpu < smp_cpus; cpu++) {
>>>            ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu));
>>> @@ -406,8 +407,46 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms)
>>>                    ms->possible_cpus->cpus[cs->cpu_index].props.node_id);
>>>            }
>>> +        if (!vmc->no_cpu_topology) {
>>> +            qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle",
>>> +                                  qemu_fdt_alloc_phandle(ms->fdt));
>>> +        }
>>> +
>>>            g_free(nodename);
>>>        }
>>> +
>>> +    if (!vmc->no_cpu_topology) {
>>> +        /*
>>> +         * See Linux Documentation/devicetree/bindings/cpu/cpu-topology.txt
>>> +         * In a SMP system, the hierarchy of CPUs is defined through four
>>> +         * entities that are used to describe the layout of physical CPUs
>>> +         * in the system: socket/cluster/core/thread.
>>> +         */
>>> +        qemu_fdt_add_subnode(ms->fdt, "/cpus/cpu-map");
>>> +
>>> +        for (cpu = smp_cpus - 1; cpu >= 0; cpu--) {
>>> +            char *cpu_path = g_strdup_printf("/cpus/cpu@%d", cpu);
>>> +            char *map_path;
>>> +
>>> +            if (ms->smp.threads > 1) {
>>> +                map_path = g_strdup_printf(
>>> +                    "/cpus/cpu-map/socket%d/cluster0/core%d/thread%d",
>>> +                    cpu / (ms->smp.cores * ms->smp.threads),
>>> +                    (cpu / ms->smp.threads) % ms->smp.cores,
>>> +                    cpu % ms->smp.threads);
>> It seems that there is some discrepancy between the documentation
>> (Documentation/devicetree/bindings/cpu/cpu-topology.txt) and the
>> actual implementation of DT topology parser for ARM64
>> (function parse_dt_topology() in drivers/base/arch_topology.c).
>>
>> The doc says the cpu-map node's child nodes can be:
>>      - one or more cluster nodes or
>>      - one or more socket nodes in a multi-socket system
>> which means a cpu-map can be defined as two formats such as:
>> 1) cpu-map
>>                     socket0
>>                                  cluster0
>>                                               core0
>>                                               core1
>>                                  cluster1
>>                                               core0
>>                                               core1
>>                     socket1
>>                                  cluster0
>>                                               core0
>>                                               core1
>>                                  cluster1
>>                                               core0
>>                                               core1
>>
>> 2) cpu-map
>>                     cluster0
>>                                  cluster0
>>                                               core0
>>                                               core1
>>                                  cluster1
>>                                               core0
>>                                               core1
>>                     cluster1
>>                                  cluster0
>>                                               core0
>>                                               core1
>>                                  cluster1
>>                                               core0
>>                                               core1
>>
>> But current parser only assumes that there are nested clusters within
>> cpu-map and is unaware of socket, the parser also ignore any information
>> about the nesting of clusters and present the scheduler with a flat list of
>> them. So based on current parser, we will get 4 packages (sockets) totally,
>> 2 cores per package, 1 threads per core from 2), but will get nothing
>> useful from 1).
>>
>> I think the ARM64 kernel DT parser should be optimized so that it's
>> also aware of sockets and can parse both formats of cpu-map. But
>> before this, I think we still have to build the cpu-map in format 2) if
>> we hope to describe topology successfully through DT. :)
> I would stick to implementing this per the documentation and as similarly
> to the ACPI version as possible. I doubt there will be many cases where a
> mach-virt VM is launched with a complex topology description and with DT
> instead of ACPI anyway. And, if there is a use for that, then the Linux
> parser needs to be fixed to match the documentation.
Thanks for the suggestion!

I agree at this point. We may need to keep consistence with the Doc.
The guest kernel should try to enure itself can parse both cpu-map
formats and should also accept the result if it can not get anything
useful from a well defined cpu-map.

Thanks,
Yanan
.
> Thanks,
> drew
>
>> Thanks,
>> Yanan
>> .
>>> +            } else {
>>> +                map_path = g_strdup_printf(
>>> +                    "/cpus/cpu-map/socket%d/cluster0/core%d",
>>> +                    cpu / ms->smp.cores,
>>> +                    cpu % ms->smp.cores);
>>> +            }
>>> +            qemu_fdt_add_path(ms->fdt, map_path);
>>> +            qemu_fdt_setprop_phandle(ms->fdt, map_path, "cpu", cpu_path);
>>> +
>>> +            g_free(map_path);
>>> +            g_free(cpu_path);
>>> +        }
>>> +    }
>>>    }
>>>    static void fdt_add_its_gic_node(VirtMachineState *vms)
> .
diff mbox series

Patch

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 82f2eba6bd..d1e294be95 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -350,20 +350,21 @@  static void fdt_add_cpu_nodes(const VirtMachineState *vms)
     int cpu;
     int addr_cells = 1;
     const MachineState *ms = MACHINE(vms);
+    const VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
     int smp_cpus = ms->smp.cpus;
 
     /*
-     * From Documentation/devicetree/bindings/arm/cpus.txt
-     *  On ARM v8 64-bit systems value should be set to 2,
-     *  that corresponds to the MPIDR_EL1 register size.
-     *  If MPIDR_EL1[63:32] value is equal to 0 on all CPUs
-     *  in the system, #address-cells can be set to 1, since
-     *  MPIDR_EL1[63:32] bits are not used for CPUs
-     *  identification.
+     * See Linux Documentation/devicetree/bindings/arm/cpus.yaml
+     * On ARM v8 64-bit systems value should be set to 2,
+     * that corresponds to the MPIDR_EL1 register size.
+     * If MPIDR_EL1[63:32] value is equal to 0 on all CPUs
+     * in the system, #address-cells can be set to 1, since
+     * MPIDR_EL1[63:32] bits are not used for CPUs
+     * identification.
      *
-     *  Here we actually don't know whether our system is 32- or 64-bit one.
-     *  The simplest way to go is to examine affinity IDs of all our CPUs. If
-     *  at least one of them has Aff3 populated, we set #address-cells to 2.
+     * Here we actually don't know whether our system is 32- or 64-bit one.
+     * The simplest way to go is to examine affinity IDs of all our CPUs. If
+     * at least one of them has Aff3 populated, we set #address-cells to 2.
      */
     for (cpu = 0; cpu < smp_cpus; cpu++) {
         ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu));
@@ -406,8 +407,46 @@  static void fdt_add_cpu_nodes(const VirtMachineState *vms)
                 ms->possible_cpus->cpus[cs->cpu_index].props.node_id);
         }
 
+        if (!vmc->no_cpu_topology) {
+            qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle",
+                                  qemu_fdt_alloc_phandle(ms->fdt));
+        }
+
         g_free(nodename);
     }
+
+    if (!vmc->no_cpu_topology) {
+        /*
+         * See Linux Documentation/devicetree/bindings/cpu/cpu-topology.txt
+         * In a SMP system, the hierarchy of CPUs is defined through four
+         * entities that are used to describe the layout of physical CPUs
+         * in the system: socket/cluster/core/thread.
+         */
+        qemu_fdt_add_subnode(ms->fdt, "/cpus/cpu-map");
+
+        for (cpu = smp_cpus - 1; cpu >= 0; cpu--) {
+            char *cpu_path = g_strdup_printf("/cpus/cpu@%d", cpu);
+            char *map_path;
+
+            if (ms->smp.threads > 1) {
+                map_path = g_strdup_printf(
+                    "/cpus/cpu-map/socket%d/cluster0/core%d/thread%d",
+                    cpu / (ms->smp.cores * ms->smp.threads),
+                    (cpu / ms->smp.threads) % ms->smp.cores,
+                    cpu % ms->smp.threads);
+            } else {
+                map_path = g_strdup_printf(
+                    "/cpus/cpu-map/socket%d/cluster0/core%d",
+                    cpu / ms->smp.cores,
+                    cpu % ms->smp.cores);
+            }
+            qemu_fdt_add_path(ms->fdt, map_path);
+            qemu_fdt_setprop_phandle(ms->fdt, map_path, "cpu", cpu_path);
+
+            g_free(map_path);
+            g_free(cpu_path);
+        }
+    }
 }
 
 static void fdt_add_its_gic_node(VirtMachineState *vms)