Message ID | 20230801103527.397756-9-zhao1.liu@linux.intel.com |
---|---|
State | New |
Headers | show |
Series | Support smp.clusters for x86 | expand |
Hi Zhao, On 8/1/23 05:35, Zhao Liu wrote: > From: Zhuocheng Ding <zhuocheng.ding@intel.com> > > Support module level in i386 cpu topology structure "X86CPUTopoInfo". > > Since x86 does not yet support the "clusters" parameter in "-smp", > X86CPUTopoInfo.modules_per_die is currently always 1. Therefore, the > module level width in APIC ID, which can be calculated by > "apicid_bitwidth_for_count(topo_info->modules_per_die)", is always 0 > for now, so we can directly add APIC ID related helpers to support > module level parsing. > > At present, we don't expose module level in CPUID.1FH because currently > linux (v6.4-rc1) doesn't support module level. And exposing module and > die levels at the same time in CPUID.1FH will cause linux to calculate > the wrong die_id. The module level should be exposed until the real > machine has the module level in CPUID.1FH. > > In addition, update topology structure in test-x86-topo.c. > > Signed-off-by: Zhuocheng Ding <zhuocheng.ding@intel.com> > Co-developed-by: Zhao Liu <zhao1.liu@intel.com> > Signed-off-by: Zhao Liu <zhao1.liu@intel.com> > Acked-by: Michael S. Tsirkin <mst@redhat.com> > --- > Changes since v1: > * Include module level related helpers (apicid_module_width() and > apicid_module_offset()) in this patch. (Yanan) > --- > hw/i386/x86.c | 3 ++- > include/hw/i386/topology.h | 22 +++++++++++++++---- > target/i386/cpu.c | 12 ++++++---- > tests/unit/test-x86-topo.c | 45 ++++++++++++++++++++------------------ > 4 files changed, 52 insertions(+), 30 deletions(-) > > diff --git a/hw/i386/x86.c b/hw/i386/x86.c > index 4efc390905ff..a552ae8bb4a8 100644 > --- a/hw/i386/x86.c > +++ b/hw/i386/x86.c > @@ -72,7 +72,8 @@ static void init_topo_info(X86CPUTopoInfo *topo_info, > MachineState *ms = MACHINE(x86ms); > > topo_info->dies_per_pkg = ms->smp.dies; > - topo_info->cores_per_die = ms->smp.cores; > + topo_info->modules_per_die = ms->smp.clusters; It is confusing. You said in the previous patch, using the clusters for x86 is going to cause compatibility issues. Why is this clusters is used to initialize modules_per_die? Why not define a new field "modules"(just like clusters) in smp and use it x86? Is is going to a problem? May be I am not clear here. I am yet to understand all the other changes. Thanks Babu > + topo_info->cores_per_module = ms->smp.cores; > topo_info->threads_per_core = ms->smp.threads; > } > > diff --git a/include/hw/i386/topology.h b/include/hw/i386/topology.h > index 5a19679f618b..c807d3811dd3 100644 > --- a/include/hw/i386/topology.h > +++ b/include/hw/i386/topology.h > @@ -56,7 +56,8 @@ typedef struct X86CPUTopoIDs { > > typedef struct X86CPUTopoInfo { > unsigned dies_per_pkg; > - unsigned cores_per_die; > + unsigned modules_per_die; > + unsigned cores_per_module; > unsigned threads_per_core; > } X86CPUTopoInfo; > > @@ -77,7 +78,13 @@ static inline unsigned apicid_smt_width(X86CPUTopoInfo *topo_info) > /* Bit width of the Core_ID field */ > static inline unsigned apicid_core_width(X86CPUTopoInfo *topo_info) > { > - return apicid_bitwidth_for_count(topo_info->cores_per_die); > + return apicid_bitwidth_for_count(topo_info->cores_per_module); > +} > + > +/* Bit width of the Module_ID (cluster ID) field */ > +static inline unsigned apicid_module_width(X86CPUTopoInfo *topo_info) > +{ > + return apicid_bitwidth_for_count(topo_info->modules_per_die); > } > > /* Bit width of the Die_ID field */ > @@ -92,10 +99,16 @@ static inline unsigned apicid_core_offset(X86CPUTopoInfo *topo_info) > return apicid_smt_width(topo_info); > } > > +/* Bit offset of the Module_ID (cluster ID) field */ > +static inline unsigned apicid_module_offset(X86CPUTopoInfo *topo_info) > +{ > + return apicid_core_offset(topo_info) + apicid_core_width(topo_info); > +} > + > /* Bit offset of the Die_ID field */ > static inline unsigned apicid_die_offset(X86CPUTopoInfo *topo_info) > { > - return apicid_core_offset(topo_info) + apicid_core_width(topo_info); > + return apicid_module_offset(topo_info) + apicid_module_width(topo_info); > } > > /* Bit offset of the Pkg_ID (socket ID) field */ > @@ -127,7 +140,8 @@ static inline void x86_topo_ids_from_idx(X86CPUTopoInfo *topo_info, > X86CPUTopoIDs *topo_ids) > { > unsigned nr_dies = topo_info->dies_per_pkg; > - unsigned nr_cores = topo_info->cores_per_die; > + unsigned nr_cores = topo_info->cores_per_module * > + topo_info->modules_per_die; > unsigned nr_threads = topo_info->threads_per_core; > > topo_ids->pkg_id = cpu_index / (nr_dies * nr_cores * nr_threads); > diff --git a/target/i386/cpu.c b/target/i386/cpu.c > index 8a9fd5682efc..d6969813ee02 100644 > --- a/target/i386/cpu.c > +++ b/target/i386/cpu.c > @@ -339,7 +339,9 @@ static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, > > /* L3 is shared among multiple cores */ > if (cache->level == 3) { > - l3_threads = topo_info->cores_per_die * topo_info->threads_per_core; > + l3_threads = topo_info->modules_per_die * > + topo_info->cores_per_module * > + topo_info->threads_per_core; > *eax |= (l3_threads - 1) << 14; > } else { > *eax |= ((topo_info->threads_per_core - 1) << 14); > @@ -6012,10 +6014,12 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, > uint32_t cpus_per_pkg; > > topo_info.dies_per_pkg = env->nr_dies; > - topo_info.cores_per_die = cs->nr_cores / env->nr_dies; > + topo_info.modules_per_die = env->nr_modules; > + topo_info.cores_per_module = cs->nr_cores / env->nr_dies / env->nr_modules; > topo_info.threads_per_core = cs->nr_threads; > > - cores_per_pkg = topo_info.cores_per_die * topo_info.dies_per_pkg; > + cores_per_pkg = topo_info.cores_per_module * topo_info.modules_per_die * > + topo_info.dies_per_pkg; > cpus_per_pkg = cores_per_pkg * topo_info.threads_per_core; > > /* Calculate & apply limits for different index ranges */ > @@ -6286,7 +6290,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, > break; > case 1: > *eax = apicid_die_offset(&topo_info); > - *ebx = topo_info.cores_per_die * topo_info.threads_per_core; > + *ebx = cpus_per_pkg / topo_info.dies_per_pkg; > *ecx |= CPUID_TOPOLOGY_LEVEL_CORE; > break; > case 2: > diff --git a/tests/unit/test-x86-topo.c b/tests/unit/test-x86-topo.c > index 2b104f86d7c2..f21b8a5d95c2 100644 > --- a/tests/unit/test-x86-topo.c > +++ b/tests/unit/test-x86-topo.c > @@ -30,13 +30,16 @@ static void test_topo_bits(void) > { > X86CPUTopoInfo topo_info = {0}; > > - /* simple tests for 1 thread per core, 1 core per die, 1 die per package */ > - topo_info = (X86CPUTopoInfo) {1, 1, 1}; > + /* > + * simple tests for 1 thread per core, 1 core per module, > + * 1 module per die, 1 die per package > + */ > + topo_info = (X86CPUTopoInfo) {1, 1, 1, 1}; > g_assert_cmpuint(apicid_smt_width(&topo_info), ==, 0); > g_assert_cmpuint(apicid_core_width(&topo_info), ==, 0); > g_assert_cmpuint(apicid_die_width(&topo_info), ==, 0); > > - topo_info = (X86CPUTopoInfo) {1, 1, 1}; > + topo_info = (X86CPUTopoInfo) {1, 1, 1, 1}; > g_assert_cmpuint(x86_apicid_from_cpu_idx(&topo_info, 0), ==, 0); > g_assert_cmpuint(x86_apicid_from_cpu_idx(&topo_info, 1), ==, 1); > g_assert_cmpuint(x86_apicid_from_cpu_idx(&topo_info, 2), ==, 2); > @@ -45,39 +48,39 @@ static void test_topo_bits(void) > > /* Test field width calculation for multiple values > */ > - topo_info = (X86CPUTopoInfo) {1, 1, 2}; > + topo_info = (X86CPUTopoInfo) {1, 1, 1, 2}; > g_assert_cmpuint(apicid_smt_width(&topo_info), ==, 1); > - topo_info = (X86CPUTopoInfo) {1, 1, 3}; > + topo_info = (X86CPUTopoInfo) {1, 1, 1, 3}; > g_assert_cmpuint(apicid_smt_width(&topo_info), ==, 2); > - topo_info = (X86CPUTopoInfo) {1, 1, 4}; > + topo_info = (X86CPUTopoInfo) {1, 1, 1, 4}; > g_assert_cmpuint(apicid_smt_width(&topo_info), ==, 2); > > - topo_info = (X86CPUTopoInfo) {1, 1, 14}; > + topo_info = (X86CPUTopoInfo) {1, 1, 1, 14}; > g_assert_cmpuint(apicid_smt_width(&topo_info), ==, 4); > - topo_info = (X86CPUTopoInfo) {1, 1, 15}; > + topo_info = (X86CPUTopoInfo) {1, 1, 1, 15}; > g_assert_cmpuint(apicid_smt_width(&topo_info), ==, 4); > - topo_info = (X86CPUTopoInfo) {1, 1, 16}; > + topo_info = (X86CPUTopoInfo) {1, 1, 1, 16}; > g_assert_cmpuint(apicid_smt_width(&topo_info), ==, 4); > - topo_info = (X86CPUTopoInfo) {1, 1, 17}; > + topo_info = (X86CPUTopoInfo) {1, 1, 1, 17}; > g_assert_cmpuint(apicid_smt_width(&topo_info), ==, 5); > > > - topo_info = (X86CPUTopoInfo) {1, 30, 2}; > + topo_info = (X86CPUTopoInfo) {1, 1, 30, 2}; > g_assert_cmpuint(apicid_core_width(&topo_info), ==, 5); > - topo_info = (X86CPUTopoInfo) {1, 31, 2}; > + topo_info = (X86CPUTopoInfo) {1, 1, 31, 2}; > g_assert_cmpuint(apicid_core_width(&topo_info), ==, 5); > - topo_info = (X86CPUTopoInfo) {1, 32, 2}; > + topo_info = (X86CPUTopoInfo) {1, 1, 32, 2}; > g_assert_cmpuint(apicid_core_width(&topo_info), ==, 5); > - topo_info = (X86CPUTopoInfo) {1, 33, 2}; > + topo_info = (X86CPUTopoInfo) {1, 1, 33, 2}; > g_assert_cmpuint(apicid_core_width(&topo_info), ==, 6); > > - topo_info = (X86CPUTopoInfo) {1, 30, 2}; > + topo_info = (X86CPUTopoInfo) {1, 1, 30, 2}; > g_assert_cmpuint(apicid_die_width(&topo_info), ==, 0); > - topo_info = (X86CPUTopoInfo) {2, 30, 2}; > + topo_info = (X86CPUTopoInfo) {2, 1, 30, 2}; > g_assert_cmpuint(apicid_die_width(&topo_info), ==, 1); > - topo_info = (X86CPUTopoInfo) {3, 30, 2}; > + topo_info = (X86CPUTopoInfo) {3, 1, 30, 2}; > g_assert_cmpuint(apicid_die_width(&topo_info), ==, 2); > - topo_info = (X86CPUTopoInfo) {4, 30, 2}; > + topo_info = (X86CPUTopoInfo) {4, 1, 30, 2}; > g_assert_cmpuint(apicid_die_width(&topo_info), ==, 2); > > /* build a weird topology and see if IDs are calculated correctly > @@ -85,18 +88,18 @@ static void test_topo_bits(void) > > /* This will use 2 bits for thread ID and 3 bits for core ID > */ > - topo_info = (X86CPUTopoInfo) {1, 6, 3}; > + topo_info = (X86CPUTopoInfo) {1, 1, 6, 3}; > g_assert_cmpuint(apicid_smt_width(&topo_info), ==, 2); > g_assert_cmpuint(apicid_core_offset(&topo_info), ==, 2); > g_assert_cmpuint(apicid_die_offset(&topo_info), ==, 5); > g_assert_cmpuint(apicid_pkg_offset(&topo_info), ==, 5); > > - topo_info = (X86CPUTopoInfo) {1, 6, 3}; > + topo_info = (X86CPUTopoInfo) {1, 1, 6, 3}; > g_assert_cmpuint(x86_apicid_from_cpu_idx(&topo_info, 0), ==, 0); > g_assert_cmpuint(x86_apicid_from_cpu_idx(&topo_info, 1), ==, 1); > g_assert_cmpuint(x86_apicid_from_cpu_idx(&topo_info, 2), ==, 2); > > - topo_info = (X86CPUTopoInfo) {1, 6, 3}; > + topo_info = (X86CPUTopoInfo) {1, 1, 6, 3}; > g_assert_cmpuint(x86_apicid_from_cpu_idx(&topo_info, 1 * 3 + 0), ==, > (1 << 2) | 0); > g_assert_cmpuint(x86_apicid_from_cpu_idx(&topo_info, 1 * 3 + 1), ==,
Hi Babu, On Wed, Aug 02, 2023 at 12:25:07PM -0500, Moger, Babu wrote: > Date: Wed, 2 Aug 2023 12:25:07 -0500 > From: "Moger, Babu" <babu.moger@amd.com> > Subject: Re: [PATCH v3 08/17] i386: Support modules_per_die in > X86CPUTopoInfo > > Hi Zhao, > > On 8/1/23 05:35, Zhao Liu wrote: > > From: Zhuocheng Ding <zhuocheng.ding@intel.com> > > > > Support module level in i386 cpu topology structure "X86CPUTopoInfo". > > > > Since x86 does not yet support the "clusters" parameter in "-smp", > > X86CPUTopoInfo.modules_per_die is currently always 1. Therefore, the > > module level width in APIC ID, which can be calculated by > > "apicid_bitwidth_for_count(topo_info->modules_per_die)", is always 0 > > for now, so we can directly add APIC ID related helpers to support > > module level parsing. > > > > At present, we don't expose module level in CPUID.1FH because currently > > linux (v6.4-rc1) doesn't support module level. And exposing module and > > die levels at the same time in CPUID.1FH will cause linux to calculate > > the wrong die_id. The module level should be exposed until the real > > machine has the module level in CPUID.1FH. > > > > In addition, update topology structure in test-x86-topo.c. > > > > Signed-off-by: Zhuocheng Ding <zhuocheng.ding@intel.com> > > Co-developed-by: Zhao Liu <zhao1.liu@intel.com> > > Signed-off-by: Zhao Liu <zhao1.liu@intel.com> > > Acked-by: Michael S. Tsirkin <mst@redhat.com> > > --- > > Changes since v1: > > * Include module level related helpers (apicid_module_width() and > > apicid_module_offset()) in this patch. (Yanan) > > --- > > hw/i386/x86.c | 3 ++- > > include/hw/i386/topology.h | 22 +++++++++++++++---- > > target/i386/cpu.c | 12 ++++++---- > > tests/unit/test-x86-topo.c | 45 ++++++++++++++++++++------------------ > > 4 files changed, 52 insertions(+), 30 deletions(-) > > > > diff --git a/hw/i386/x86.c b/hw/i386/x86.c > > index 4efc390905ff..a552ae8bb4a8 100644 > > --- a/hw/i386/x86.c > > +++ b/hw/i386/x86.c > > @@ -72,7 +72,8 @@ static void init_topo_info(X86CPUTopoInfo *topo_info, > > MachineState *ms = MACHINE(x86ms); > > > > topo_info->dies_per_pkg = ms->smp.dies; > > - topo_info->cores_per_die = ms->smp.cores; > > + topo_info->modules_per_die = ms->smp.clusters; > > It is confusing. You said in the previous patch, using the clusters for > x86 is going to cause compatibility issues. The compatibility issue means the default L2 cache topology should be "1 L2 cache per core", and we shouldn't change this default setting. If we want the "1 L2 cache per module", then we need other way to do this (this is x-l2-cache-topo). Since "cluster" was originally introduced into QEMU to help define the L2 cache topology, I explained that we can't just change the default topology level of L2. > Why is this clusters is used to initialize modules_per_die? "cluster" v.s. "module" just like "socket" v.s. "package". The former is the generic name in smp code, while the latter is the more accurate naming in the i386 context. > > Why not define a new field "modules"(just like clusters) in smp and use it > x86? Is is going to a problem? In this case (just add a new "module" in smp), the "cluster" parameter of smp is not useful for i386, and different architectures have different parameters for smp, which is not general enough. I think it's clearest to have a common topology hierarchy in QEMU. cluster was originally introduced to QEMU by arm. From Yanan's explanation [1], it is a CPU topology level, above the core level, and that L2 is often shared at this level as well. This description is very similar to i386's module, so I think we could align cluster with module instead of intruducing a new "module" in smp, just like "socket" in smp is the same as "package" in i386. [1]: https://patchew.org/QEMU/20211228092221.21068-1-wangyanan55@huawei.com/ > May be I am not clear here. I am yet to understand all the other changes. > Hope my explanation above clarifies your question. Thanks, Zhao > Thanks > Babu > > > + topo_info->cores_per_module = ms->smp.cores; > > topo_info->threads_per_core = ms->smp.threads; > > } > > > > diff --git a/include/hw/i386/topology.h b/include/hw/i386/topology.h > > index 5a19679f618b..c807d3811dd3 100644 > > --- a/include/hw/i386/topology.h > > +++ b/include/hw/i386/topology.h > > @@ -56,7 +56,8 @@ typedef struct X86CPUTopoIDs { > > > > typedef struct X86CPUTopoInfo { > > unsigned dies_per_pkg; > > - unsigned cores_per_die; > > + unsigned modules_per_die; > > + unsigned cores_per_module; > > unsigned threads_per_core; > > } X86CPUTopoInfo; > > > > @@ -77,7 +78,13 @@ static inline unsigned apicid_smt_width(X86CPUTopoInfo *topo_info) > > /* Bit width of the Core_ID field */ > > static inline unsigned apicid_core_width(X86CPUTopoInfo *topo_info) > > { > > - return apicid_bitwidth_for_count(topo_info->cores_per_die); > > + return apicid_bitwidth_for_count(topo_info->cores_per_module); > > +} > > + > > +/* Bit width of the Module_ID (cluster ID) field */ > > +static inline unsigned apicid_module_width(X86CPUTopoInfo *topo_info) > > +{ > > + return apicid_bitwidth_for_count(topo_info->modules_per_die); > > } > > > > /* Bit width of the Die_ID field */ > > @@ -92,10 +99,16 @@ static inline unsigned apicid_core_offset(X86CPUTopoInfo *topo_info) > > return apicid_smt_width(topo_info); > > } > > > > +/* Bit offset of the Module_ID (cluster ID) field */ > > +static inline unsigned apicid_module_offset(X86CPUTopoInfo *topo_info) > > +{ > > + return apicid_core_offset(topo_info) + apicid_core_width(topo_info); > > +} > > + > > /* Bit offset of the Die_ID field */ > > static inline unsigned apicid_die_offset(X86CPUTopoInfo *topo_info) > > { > > - return apicid_core_offset(topo_info) + apicid_core_width(topo_info); > > + return apicid_module_offset(topo_info) + apicid_module_width(topo_info); > > } > > > > /* Bit offset of the Pkg_ID (socket ID) field */ > > @@ -127,7 +140,8 @@ static inline void x86_topo_ids_from_idx(X86CPUTopoInfo *topo_info, > > X86CPUTopoIDs *topo_ids) > > { > > unsigned nr_dies = topo_info->dies_per_pkg; > > - unsigned nr_cores = topo_info->cores_per_die; > > + unsigned nr_cores = topo_info->cores_per_module * > > + topo_info->modules_per_die; > > unsigned nr_threads = topo_info->threads_per_core; > > > > topo_ids->pkg_id = cpu_index / (nr_dies * nr_cores * nr_threads); > > diff --git a/target/i386/cpu.c b/target/i386/cpu.c > > index 8a9fd5682efc..d6969813ee02 100644 > > --- a/target/i386/cpu.c > > +++ b/target/i386/cpu.c > > @@ -339,7 +339,9 @@ static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, > > > > /* L3 is shared among multiple cores */ > > if (cache->level == 3) { > > - l3_threads = topo_info->cores_per_die * topo_info->threads_per_core; > > + l3_threads = topo_info->modules_per_die * > > + topo_info->cores_per_module * > > + topo_info->threads_per_core; > > *eax |= (l3_threads - 1) << 14; > > } else { > > *eax |= ((topo_info->threads_per_core - 1) << 14); > > @@ -6012,10 +6014,12 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, > > uint32_t cpus_per_pkg; > > > > topo_info.dies_per_pkg = env->nr_dies; > > - topo_info.cores_per_die = cs->nr_cores / env->nr_dies; > > + topo_info.modules_per_die = env->nr_modules; > > + topo_info.cores_per_module = cs->nr_cores / env->nr_dies / env->nr_modules; > > topo_info.threads_per_core = cs->nr_threads; > > > > - cores_per_pkg = topo_info.cores_per_die * topo_info.dies_per_pkg; > > + cores_per_pkg = topo_info.cores_per_module * topo_info.modules_per_die * > > + topo_info.dies_per_pkg; > > cpus_per_pkg = cores_per_pkg * topo_info.threads_per_core; > > > > /* Calculate & apply limits for different index ranges */ > > @@ -6286,7 +6290,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, > > break; > > case 1: > > *eax = apicid_die_offset(&topo_info); > > - *ebx = topo_info.cores_per_die * topo_info.threads_per_core; > > + *ebx = cpus_per_pkg / topo_info.dies_per_pkg; > > *ecx |= CPUID_TOPOLOGY_LEVEL_CORE; > > break; > > case 2: > > diff --git a/tests/unit/test-x86-topo.c b/tests/unit/test-x86-topo.c > > index 2b104f86d7c2..f21b8a5d95c2 100644 > > --- a/tests/unit/test-x86-topo.c > > +++ b/tests/unit/test-x86-topo.c > > @@ -30,13 +30,16 @@ static void test_topo_bits(void) > > { > > X86CPUTopoInfo topo_info = {0}; > > > > - /* simple tests for 1 thread per core, 1 core per die, 1 die per package */ > > - topo_info = (X86CPUTopoInfo) {1, 1, 1}; > > + /* > > + * simple tests for 1 thread per core, 1 core per module, > > + * 1 module per die, 1 die per package > > + */ > > + topo_info = (X86CPUTopoInfo) {1, 1, 1, 1}; > > g_assert_cmpuint(apicid_smt_width(&topo_info), ==, 0); > > g_assert_cmpuint(apicid_core_width(&topo_info), ==, 0); > > g_assert_cmpuint(apicid_die_width(&topo_info), ==, 0); > > > > - topo_info = (X86CPUTopoInfo) {1, 1, 1}; > > + topo_info = (X86CPUTopoInfo) {1, 1, 1, 1}; > > g_assert_cmpuint(x86_apicid_from_cpu_idx(&topo_info, 0), ==, 0); > > g_assert_cmpuint(x86_apicid_from_cpu_idx(&topo_info, 1), ==, 1); > > g_assert_cmpuint(x86_apicid_from_cpu_idx(&topo_info, 2), ==, 2); > > @@ -45,39 +48,39 @@ static void test_topo_bits(void) > > > > /* Test field width calculation for multiple values > > */ > > - topo_info = (X86CPUTopoInfo) {1, 1, 2}; > > + topo_info = (X86CPUTopoInfo) {1, 1, 1, 2}; > > g_assert_cmpuint(apicid_smt_width(&topo_info), ==, 1); > > - topo_info = (X86CPUTopoInfo) {1, 1, 3}; > > + topo_info = (X86CPUTopoInfo) {1, 1, 1, 3}; > > g_assert_cmpuint(apicid_smt_width(&topo_info), ==, 2); > > - topo_info = (X86CPUTopoInfo) {1, 1, 4}; > > + topo_info = (X86CPUTopoInfo) {1, 1, 1, 4}; > > g_assert_cmpuint(apicid_smt_width(&topo_info), ==, 2); > > > > - topo_info = (X86CPUTopoInfo) {1, 1, 14}; > > + topo_info = (X86CPUTopoInfo) {1, 1, 1, 14}; > > g_assert_cmpuint(apicid_smt_width(&topo_info), ==, 4); > > - topo_info = (X86CPUTopoInfo) {1, 1, 15}; > > + topo_info = (X86CPUTopoInfo) {1, 1, 1, 15}; > > g_assert_cmpuint(apicid_smt_width(&topo_info), ==, 4); > > - topo_info = (X86CPUTopoInfo) {1, 1, 16}; > > + topo_info = (X86CPUTopoInfo) {1, 1, 1, 16}; > > g_assert_cmpuint(apicid_smt_width(&topo_info), ==, 4); > > - topo_info = (X86CPUTopoInfo) {1, 1, 17}; > > + topo_info = (X86CPUTopoInfo) {1, 1, 1, 17}; > > g_assert_cmpuint(apicid_smt_width(&topo_info), ==, 5); > > > > > > - topo_info = (X86CPUTopoInfo) {1, 30, 2}; > > + topo_info = (X86CPUTopoInfo) {1, 1, 30, 2}; > > g_assert_cmpuint(apicid_core_width(&topo_info), ==, 5); > > - topo_info = (X86CPUTopoInfo) {1, 31, 2}; > > + topo_info = (X86CPUTopoInfo) {1, 1, 31, 2}; > > g_assert_cmpuint(apicid_core_width(&topo_info), ==, 5); > > - topo_info = (X86CPUTopoInfo) {1, 32, 2}; > > + topo_info = (X86CPUTopoInfo) {1, 1, 32, 2}; > > g_assert_cmpuint(apicid_core_width(&topo_info), ==, 5); > > - topo_info = (X86CPUTopoInfo) {1, 33, 2}; > > + topo_info = (X86CPUTopoInfo) {1, 1, 33, 2}; > > g_assert_cmpuint(apicid_core_width(&topo_info), ==, 6); > > > > - topo_info = (X86CPUTopoInfo) {1, 30, 2}; > > + topo_info = (X86CPUTopoInfo) {1, 1, 30, 2}; > > g_assert_cmpuint(apicid_die_width(&topo_info), ==, 0); > > - topo_info = (X86CPUTopoInfo) {2, 30, 2}; > > + topo_info = (X86CPUTopoInfo) {2, 1, 30, 2}; > > g_assert_cmpuint(apicid_die_width(&topo_info), ==, 1); > > - topo_info = (X86CPUTopoInfo) {3, 30, 2}; > > + topo_info = (X86CPUTopoInfo) {3, 1, 30, 2}; > > g_assert_cmpuint(apicid_die_width(&topo_info), ==, 2); > > - topo_info = (X86CPUTopoInfo) {4, 30, 2}; > > + topo_info = (X86CPUTopoInfo) {4, 1, 30, 2}; > > g_assert_cmpuint(apicid_die_width(&topo_info), ==, 2); > > > > /* build a weird topology and see if IDs are calculated correctly > > @@ -85,18 +88,18 @@ static void test_topo_bits(void) > > > > /* This will use 2 bits for thread ID and 3 bits for core ID > > */ > > - topo_info = (X86CPUTopoInfo) {1, 6, 3}; > > + topo_info = (X86CPUTopoInfo) {1, 1, 6, 3}; > > g_assert_cmpuint(apicid_smt_width(&topo_info), ==, 2); > > g_assert_cmpuint(apicid_core_offset(&topo_info), ==, 2); > > g_assert_cmpuint(apicid_die_offset(&topo_info), ==, 5); > > g_assert_cmpuint(apicid_pkg_offset(&topo_info), ==, 5); > > > > - topo_info = (X86CPUTopoInfo) {1, 6, 3}; > > + topo_info = (X86CPUTopoInfo) {1, 1, 6, 3}; > > g_assert_cmpuint(x86_apicid_from_cpu_idx(&topo_info, 0), ==, 0); > > g_assert_cmpuint(x86_apicid_from_cpu_idx(&topo_info, 1), ==, 1); > > g_assert_cmpuint(x86_apicid_from_cpu_idx(&topo_info, 2), ==, 2); > > > > - topo_info = (X86CPUTopoInfo) {1, 6, 3}; > > + topo_info = (X86CPUTopoInfo) {1, 1, 6, 3}; > > g_assert_cmpuint(x86_apicid_from_cpu_idx(&topo_info, 1 * 3 + 0), ==, > > (1 << 2) | 0); > > g_assert_cmpuint(x86_apicid_from_cpu_idx(&topo_info, 1 * 3 + 1), ==, > > -- > Thanks > Babu Moger
diff --git a/hw/i386/x86.c b/hw/i386/x86.c index 4efc390905ff..a552ae8bb4a8 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -72,7 +72,8 @@ static void init_topo_info(X86CPUTopoInfo *topo_info, MachineState *ms = MACHINE(x86ms); topo_info->dies_per_pkg = ms->smp.dies; - topo_info->cores_per_die = ms->smp.cores; + topo_info->modules_per_die = ms->smp.clusters; + topo_info->cores_per_module = ms->smp.cores; topo_info->threads_per_core = ms->smp.threads; } diff --git a/include/hw/i386/topology.h b/include/hw/i386/topology.h index 5a19679f618b..c807d3811dd3 100644 --- a/include/hw/i386/topology.h +++ b/include/hw/i386/topology.h @@ -56,7 +56,8 @@ typedef struct X86CPUTopoIDs { typedef struct X86CPUTopoInfo { unsigned dies_per_pkg; - unsigned cores_per_die; + unsigned modules_per_die; + unsigned cores_per_module; unsigned threads_per_core; } X86CPUTopoInfo; @@ -77,7 +78,13 @@ static inline unsigned apicid_smt_width(X86CPUTopoInfo *topo_info) /* Bit width of the Core_ID field */ static inline unsigned apicid_core_width(X86CPUTopoInfo *topo_info) { - return apicid_bitwidth_for_count(topo_info->cores_per_die); + return apicid_bitwidth_for_count(topo_info->cores_per_module); +} + +/* Bit width of the Module_ID (cluster ID) field */ +static inline unsigned apicid_module_width(X86CPUTopoInfo *topo_info) +{ + return apicid_bitwidth_for_count(topo_info->modules_per_die); } /* Bit width of the Die_ID field */ @@ -92,10 +99,16 @@ static inline unsigned apicid_core_offset(X86CPUTopoInfo *topo_info) return apicid_smt_width(topo_info); } +/* Bit offset of the Module_ID (cluster ID) field */ +static inline unsigned apicid_module_offset(X86CPUTopoInfo *topo_info) +{ + return apicid_core_offset(topo_info) + apicid_core_width(topo_info); +} + /* Bit offset of the Die_ID field */ static inline unsigned apicid_die_offset(X86CPUTopoInfo *topo_info) { - return apicid_core_offset(topo_info) + apicid_core_width(topo_info); + return apicid_module_offset(topo_info) + apicid_module_width(topo_info); } /* Bit offset of the Pkg_ID (socket ID) field */ @@ -127,7 +140,8 @@ static inline void x86_topo_ids_from_idx(X86CPUTopoInfo *topo_info, X86CPUTopoIDs *topo_ids) { unsigned nr_dies = topo_info->dies_per_pkg; - unsigned nr_cores = topo_info->cores_per_die; + unsigned nr_cores = topo_info->cores_per_module * + topo_info->modules_per_die; unsigned nr_threads = topo_info->threads_per_core; topo_ids->pkg_id = cpu_index / (nr_dies * nr_cores * nr_threads); diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 8a9fd5682efc..d6969813ee02 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -339,7 +339,9 @@ static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, /* L3 is shared among multiple cores */ if (cache->level == 3) { - l3_threads = topo_info->cores_per_die * topo_info->threads_per_core; + l3_threads = topo_info->modules_per_die * + topo_info->cores_per_module * + topo_info->threads_per_core; *eax |= (l3_threads - 1) << 14; } else { *eax |= ((topo_info->threads_per_core - 1) << 14); @@ -6012,10 +6014,12 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, uint32_t cpus_per_pkg; topo_info.dies_per_pkg = env->nr_dies; - topo_info.cores_per_die = cs->nr_cores / env->nr_dies; + topo_info.modules_per_die = env->nr_modules; + topo_info.cores_per_module = cs->nr_cores / env->nr_dies / env->nr_modules; topo_info.threads_per_core = cs->nr_threads; - cores_per_pkg = topo_info.cores_per_die * topo_info.dies_per_pkg; + cores_per_pkg = topo_info.cores_per_module * topo_info.modules_per_die * + topo_info.dies_per_pkg; cpus_per_pkg = cores_per_pkg * topo_info.threads_per_core; /* Calculate & apply limits for different index ranges */ @@ -6286,7 +6290,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, break; case 1: *eax = apicid_die_offset(&topo_info); - *ebx = topo_info.cores_per_die * topo_info.threads_per_core; + *ebx = cpus_per_pkg / topo_info.dies_per_pkg; *ecx |= CPUID_TOPOLOGY_LEVEL_CORE; break; case 2: diff --git a/tests/unit/test-x86-topo.c b/tests/unit/test-x86-topo.c index 2b104f86d7c2..f21b8a5d95c2 100644 --- a/tests/unit/test-x86-topo.c +++ b/tests/unit/test-x86-topo.c @@ -30,13 +30,16 @@ static void test_topo_bits(void) { X86CPUTopoInfo topo_info = {0}; - /* simple tests for 1 thread per core, 1 core per die, 1 die per package */ - topo_info = (X86CPUTopoInfo) {1, 1, 1}; + /* + * simple tests for 1 thread per core, 1 core per module, + * 1 module per die, 1 die per package + */ + topo_info = (X86CPUTopoInfo) {1, 1, 1, 1}; g_assert_cmpuint(apicid_smt_width(&topo_info), ==, 0); g_assert_cmpuint(apicid_core_width(&topo_info), ==, 0); g_assert_cmpuint(apicid_die_width(&topo_info), ==, 0); - topo_info = (X86CPUTopoInfo) {1, 1, 1}; + topo_info = (X86CPUTopoInfo) {1, 1, 1, 1}; g_assert_cmpuint(x86_apicid_from_cpu_idx(&topo_info, 0), ==, 0); g_assert_cmpuint(x86_apicid_from_cpu_idx(&topo_info, 1), ==, 1); g_assert_cmpuint(x86_apicid_from_cpu_idx(&topo_info, 2), ==, 2); @@ -45,39 +48,39 @@ static void test_topo_bits(void) /* Test field width calculation for multiple values */ - topo_info = (X86CPUTopoInfo) {1, 1, 2}; + topo_info = (X86CPUTopoInfo) {1, 1, 1, 2}; g_assert_cmpuint(apicid_smt_width(&topo_info), ==, 1); - topo_info = (X86CPUTopoInfo) {1, 1, 3}; + topo_info = (X86CPUTopoInfo) {1, 1, 1, 3}; g_assert_cmpuint(apicid_smt_width(&topo_info), ==, 2); - topo_info = (X86CPUTopoInfo) {1, 1, 4}; + topo_info = (X86CPUTopoInfo) {1, 1, 1, 4}; g_assert_cmpuint(apicid_smt_width(&topo_info), ==, 2); - topo_info = (X86CPUTopoInfo) {1, 1, 14}; + topo_info = (X86CPUTopoInfo) {1, 1, 1, 14}; g_assert_cmpuint(apicid_smt_width(&topo_info), ==, 4); - topo_info = (X86CPUTopoInfo) {1, 1, 15}; + topo_info = (X86CPUTopoInfo) {1, 1, 1, 15}; g_assert_cmpuint(apicid_smt_width(&topo_info), ==, 4); - topo_info = (X86CPUTopoInfo) {1, 1, 16}; + topo_info = (X86CPUTopoInfo) {1, 1, 1, 16}; g_assert_cmpuint(apicid_smt_width(&topo_info), ==, 4); - topo_info = (X86CPUTopoInfo) {1, 1, 17}; + topo_info = (X86CPUTopoInfo) {1, 1, 1, 17}; g_assert_cmpuint(apicid_smt_width(&topo_info), ==, 5); - topo_info = (X86CPUTopoInfo) {1, 30, 2}; + topo_info = (X86CPUTopoInfo) {1, 1, 30, 2}; g_assert_cmpuint(apicid_core_width(&topo_info), ==, 5); - topo_info = (X86CPUTopoInfo) {1, 31, 2}; + topo_info = (X86CPUTopoInfo) {1, 1, 31, 2}; g_assert_cmpuint(apicid_core_width(&topo_info), ==, 5); - topo_info = (X86CPUTopoInfo) {1, 32, 2}; + topo_info = (X86CPUTopoInfo) {1, 1, 32, 2}; g_assert_cmpuint(apicid_core_width(&topo_info), ==, 5); - topo_info = (X86CPUTopoInfo) {1, 33, 2}; + topo_info = (X86CPUTopoInfo) {1, 1, 33, 2}; g_assert_cmpuint(apicid_core_width(&topo_info), ==, 6); - topo_info = (X86CPUTopoInfo) {1, 30, 2}; + topo_info = (X86CPUTopoInfo) {1, 1, 30, 2}; g_assert_cmpuint(apicid_die_width(&topo_info), ==, 0); - topo_info = (X86CPUTopoInfo) {2, 30, 2}; + topo_info = (X86CPUTopoInfo) {2, 1, 30, 2}; g_assert_cmpuint(apicid_die_width(&topo_info), ==, 1); - topo_info = (X86CPUTopoInfo) {3, 30, 2}; + topo_info = (X86CPUTopoInfo) {3, 1, 30, 2}; g_assert_cmpuint(apicid_die_width(&topo_info), ==, 2); - topo_info = (X86CPUTopoInfo) {4, 30, 2}; + topo_info = (X86CPUTopoInfo) {4, 1, 30, 2}; g_assert_cmpuint(apicid_die_width(&topo_info), ==, 2); /* build a weird topology and see if IDs are calculated correctly @@ -85,18 +88,18 @@ static void test_topo_bits(void) /* This will use 2 bits for thread ID and 3 bits for core ID */ - topo_info = (X86CPUTopoInfo) {1, 6, 3}; + topo_info = (X86CPUTopoInfo) {1, 1, 6, 3}; g_assert_cmpuint(apicid_smt_width(&topo_info), ==, 2); g_assert_cmpuint(apicid_core_offset(&topo_info), ==, 2); g_assert_cmpuint(apicid_die_offset(&topo_info), ==, 5); g_assert_cmpuint(apicid_pkg_offset(&topo_info), ==, 5); - topo_info = (X86CPUTopoInfo) {1, 6, 3}; + topo_info = (X86CPUTopoInfo) {1, 1, 6, 3}; g_assert_cmpuint(x86_apicid_from_cpu_idx(&topo_info, 0), ==, 0); g_assert_cmpuint(x86_apicid_from_cpu_idx(&topo_info, 1), ==, 1); g_assert_cmpuint(x86_apicid_from_cpu_idx(&topo_info, 2), ==, 2); - topo_info = (X86CPUTopoInfo) {1, 6, 3}; + topo_info = (X86CPUTopoInfo) {1, 1, 6, 3}; g_assert_cmpuint(x86_apicid_from_cpu_idx(&topo_info, 1 * 3 + 0), ==, (1 << 2) | 0); g_assert_cmpuint(x86_apicid_from_cpu_idx(&topo_info, 1 * 3 + 1), ==,