diff mbox

[v2,1/2] numa: introduce numa_auto_assign_ram() function in MachineClass

Message ID 20170427101259.13798-2-lvivier@redhat.com
State New
Headers show

Commit Message

Laurent Vivier April 27, 2017, 10:12 a.m. UTC
We need to change the way we distribute the memory across
the nodes. To keep compatibility between machine type version
introduce a  machine type dependent function.

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
 include/hw/boards.h |  2 ++
 numa.c              | 44 +++++++++++++++++++++++++++++++++-----------
 2 files changed, 35 insertions(+), 11 deletions(-)

Comments

Eduardo Habkost April 27, 2017, 2:09 p.m. UTC | #1
On Thu, Apr 27, 2017 at 12:12:58PM +0200, Laurent Vivier wrote:
> We need to change the way we distribute the memory across
> the nodes. To keep compatibility between machine type version
> introduce a  machine type dependent function.
> 
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
[...]
> +static void numa_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
> +                                 int nb_nodes, ram_addr_t size)
> +{
> +    int i;
> +    uint64_t usedmem = 0;
> +
> +    if (mc->numa_auto_assign_ram) {
> +        uint64_t *mem = g_new(uint64_t, nb_nodes);
> +
> +        mc->numa_auto_assign_ram(mem, nb_nodes, size);
> +
> +        for (i = 0; i < nb_nodes; i++) {
> +            nodes[i].node_mem = mem[i];
> +        }
> +
> +        g_free(mem);
> +
> +        return;
> +    }
> +
> +    /* Align each node according to the alignment
> +     * requirements of the machine class
> +     */
> +
> +    for (i = 0; i < nb_nodes - 1; i++) {
> +        nodes[i].node_mem = (size / nb_nodes) &
> +                            ~((1 << mc->numa_mem_align_shift) - 1);
> +        usedmem += nodes[i].node_mem;
> +    }
> +    nodes[i].node_mem = size - usedmem;
> +}

I would prefer to make your new algorithm the default, and move
this code to a legacy_auto_assign_ram() function set by the 2.9
machine-types.

> +
>  void parse_numa_opts(MachineClass *mc)
>  {
>      int i;
> @@ -336,17 +368,7 @@ void parse_numa_opts(MachineClass *mc)
>              }
>          }
>          if (i == nb_numa_nodes) {
> -            uint64_t usedmem = 0;
> -
> -            /* Align each node according to the alignment
> -             * requirements of the machine class
> -             */
> -            for (i = 0; i < nb_numa_nodes - 1; i++) {
> -                numa_info[i].node_mem = (ram_size / nb_numa_nodes) &
> -                                        ~((1 << mc->numa_mem_align_shift) - 1);
> -                usedmem += numa_info[i].node_mem;
> -            }
> -            numa_info[i].node_mem = ram_size - usedmem;
> +            numa_auto_assign_ram(mc, numa_info, nb_numa_nodes, ram_size);
>          }
>  
>          numa_total = 0;
> -- 
> 2.9.3
>
Laurent Vivier April 27, 2017, 3:09 p.m. UTC | #2
On 27/04/2017 16:09, Eduardo Habkost wrote:
> On Thu, Apr 27, 2017 at 12:12:58PM +0200, Laurent Vivier wrote:
>> We need to change the way we distribute the memory across
>> the nodes. To keep compatibility between machine type version
>> introduce a  machine type dependent function.
>>
>> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
> [...]
>> +static void numa_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
>> +                                 int nb_nodes, ram_addr_t size)
>> +{
>> +    int i;
>> +    uint64_t usedmem = 0;
>> +
>> +    if (mc->numa_auto_assign_ram) {
>> +        uint64_t *mem = g_new(uint64_t, nb_nodes);
>> +
>> +        mc->numa_auto_assign_ram(mem, nb_nodes, size);
>> +
>> +        for (i = 0; i < nb_nodes; i++) {
>> +            nodes[i].node_mem = mem[i];
>> +        }
>> +
>> +        g_free(mem);
>> +
>> +        return;
>> +    }
>> +
>> +    /* Align each node according to the alignment
>> +     * requirements of the machine class
>> +     */
>> +
>> +    for (i = 0; i < nb_nodes - 1; i++) {
>> +        nodes[i].node_mem = (size / nb_nodes) &
>> +                            ~((1 << mc->numa_mem_align_shift) - 1);
>> +        usedmem += nodes[i].node_mem;
>> +    }
>> +    nodes[i].node_mem = size - usedmem;
>> +}
> 
> I would prefer to make your new algorithm the default, and move
> this code to a legacy_auto_assign_ram() function set by the 2.9
> machine-types.

I think it's easier to do as I've done because otherwise, we need:

- to add the numa_mem_align_shift to the parameters list of the
  numa_auto_assign_ram() function.

- set the function by default to numa_auto_assign_ram in
  hw/core/machine.c:machine_class_init()

- set the pointer to NULL in 2.10 pseries machine type,

- export the function to re-set the legacy function in the 2.9 pseries
  machine type definition.

So, we need to add one argument to the function, export the function to
use it from machine.c and at least spapr.c, to set the function in
machine_class_init() and spapr_machine_2_9_class_options() (as we clear
it in 2.10 function).

I can do that, but is this what you want?

Thanks,
Laurent
Eduardo Habkost April 27, 2017, 4:09 p.m. UTC | #3
On Thu, Apr 27, 2017 at 05:09:26PM +0200, Laurent Vivier wrote:
> On 27/04/2017 16:09, Eduardo Habkost wrote:
> > On Thu, Apr 27, 2017 at 12:12:58PM +0200, Laurent Vivier wrote:
> >> We need to change the way we distribute the memory across
> >> the nodes. To keep compatibility between machine type version
> >> introduce a  machine type dependent function.
> >>
> >> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
> > [...]
> >> +static void numa_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
> >> +                                 int nb_nodes, ram_addr_t size)
> >> +{
> >> +    int i;
> >> +    uint64_t usedmem = 0;
> >> +
> >> +    if (mc->numa_auto_assign_ram) {
> >> +        uint64_t *mem = g_new(uint64_t, nb_nodes);
> >> +
> >> +        mc->numa_auto_assign_ram(mem, nb_nodes, size);
> >> +
> >> +        for (i = 0; i < nb_nodes; i++) {
> >> +            nodes[i].node_mem = mem[i];
> >> +        }
> >> +
> >> +        g_free(mem);
> >> +
> >> +        return;
> >> +    }
> >> +
> >> +    /* Align each node according to the alignment
> >> +     * requirements of the machine class
> >> +     */
> >> +
> >> +    for (i = 0; i < nb_nodes - 1; i++) {
> >> +        nodes[i].node_mem = (size / nb_nodes) &
> >> +                            ~((1 << mc->numa_mem_align_shift) - 1);
> >> +        usedmem += nodes[i].node_mem;
> >> +    }
> >> +    nodes[i].node_mem = size - usedmem;
> >> +}
> > 
> > I would prefer to make your new algorithm the default, and move
> > this code to a legacy_auto_assign_ram() function set by the 2.9
> > machine-types.
> 
> I think it's easier to do as I've done because otherwise, we need:
> 
> - to add the numa_mem_align_shift to the parameters list of the
>   numa_auto_assign_ram() function.

You can take MachineClass or MachineState as paramter.

> 
> - set the function by default to numa_auto_assign_ram in
>   hw/core/machine.c:machine_class_init()

Yep.

> 
> - set the pointer to NULL in 2.10 pseries machine type,

Won't pseries-2.10 have the same behavior as all other machines
except pc/pseries <= 2.9? pseries-2.10 and pc-2.10 would just
reuse the default value set by machine_class_init().

> 
> - export the function to re-set the legacy function in the 2.9 pseries
>   machine type definition.

Well, this is the part where I agree it's too much hassle.  :)

> 
> So, we need to add one argument to the function, export the function to
> use it from machine.c and at least spapr.c, to set the function in
> machine_class_init() and spapr_machine_2_9_class_options() (as we clear
> it in 2.10 function).
> 
> I can do that, but is this what you want?

I don't have a strong opinion. If you believe your way is
simpler, we can keep it.
Eduardo Habkost April 27, 2017, 8:28 p.m. UTC | #4
On Thu, Apr 27, 2017 at 12:12:58PM +0200, Laurent Vivier wrote:
> We need to change the way we distribute the memory across
> the nodes. To keep compatibility between machine type version
> introduce a  machine type dependent function.
> 
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
> ---
>  include/hw/boards.h |  2 ++
>  numa.c              | 44 +++++++++++++++++++++++++++++++++-----------
>  2 files changed, 35 insertions(+), 11 deletions(-)
> 
> diff --git a/include/hw/boards.h b/include/hw/boards.h
> index 31d9c72..e571b22 100644
> --- a/include/hw/boards.h
> +++ b/include/hw/boards.h
> @@ -136,6 +136,8 @@ struct MachineClass {
>      int minimum_page_bits;
>      bool has_hotpluggable_cpus;
>      int numa_mem_align_shift;
> +    void (*numa_auto_assign_ram)(uint64_t *nodes,
> +                                 int nb_nodes, ram_addr_t size);
>  

The code looks good, but I would like to have comments here
documenting the behavior when the field is NULL.

>      HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
>                                             DeviceState *dev);
> diff --git a/numa.c b/numa.c
> index 6fc2393..2770c18 100644
> --- a/numa.c
> +++ b/numa.c
> @@ -294,6 +294,38 @@ static void validate_numa_cpus(void)
>      g_free(seen_cpus);
>  }
>  
> +static void numa_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
> +                                 int nb_nodes, ram_addr_t size)
> +{
> +    int i;
> +    uint64_t usedmem = 0;
> +
> +    if (mc->numa_auto_assign_ram) {
> +        uint64_t *mem = g_new(uint64_t, nb_nodes);
> +
> +        mc->numa_auto_assign_ram(mem, nb_nodes, size);
> +
> +        for (i = 0; i < nb_nodes; i++) {
> +            nodes[i].node_mem = mem[i];
> +        }
> +
> +        g_free(mem);
> +
> +        return;
> +    }
> +
> +    /* Align each node according to the alignment
> +     * requirements of the machine class
> +     */
> +
> +    for (i = 0; i < nb_nodes - 1; i++) {
> +        nodes[i].node_mem = (size / nb_nodes) &
> +                            ~((1 << mc->numa_mem_align_shift) - 1);
> +        usedmem += nodes[i].node_mem;
> +    }
> +    nodes[i].node_mem = size - usedmem;
> +}
> +
>  void parse_numa_opts(MachineClass *mc)
>  {
>      int i;
> @@ -336,17 +368,7 @@ void parse_numa_opts(MachineClass *mc)
>              }
>          }
>          if (i == nb_numa_nodes) {
> -            uint64_t usedmem = 0;
> -
> -            /* Align each node according to the alignment
> -             * requirements of the machine class
> -             */
> -            for (i = 0; i < nb_numa_nodes - 1; i++) {
> -                numa_info[i].node_mem = (ram_size / nb_numa_nodes) &
> -                                        ~((1 << mc->numa_mem_align_shift) - 1);
> -                usedmem += numa_info[i].node_mem;
> -            }
> -            numa_info[i].node_mem = ram_size - usedmem;
> +            numa_auto_assign_ram(mc, numa_info, nb_numa_nodes, ram_size);
>          }
>  
>          numa_total = 0;
> -- 
> 2.9.3
>
diff mbox

Patch

diff --git a/include/hw/boards.h b/include/hw/boards.h
index 31d9c72..e571b22 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -136,6 +136,8 @@  struct MachineClass {
     int minimum_page_bits;
     bool has_hotpluggable_cpus;
     int numa_mem_align_shift;
+    void (*numa_auto_assign_ram)(uint64_t *nodes,
+                                 int nb_nodes, ram_addr_t size);
 
     HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
                                            DeviceState *dev);
diff --git a/numa.c b/numa.c
index 6fc2393..2770c18 100644
--- a/numa.c
+++ b/numa.c
@@ -294,6 +294,38 @@  static void validate_numa_cpus(void)
     g_free(seen_cpus);
 }
 
+static void numa_auto_assign_ram(MachineClass *mc, NodeInfo *nodes,
+                                 int nb_nodes, ram_addr_t size)
+{
+    int i;
+    uint64_t usedmem = 0;
+
+    if (mc->numa_auto_assign_ram) {
+        uint64_t *mem = g_new(uint64_t, nb_nodes);
+
+        mc->numa_auto_assign_ram(mem, nb_nodes, size);
+
+        for (i = 0; i < nb_nodes; i++) {
+            nodes[i].node_mem = mem[i];
+        }
+
+        g_free(mem);
+
+        return;
+    }
+
+    /* Align each node according to the alignment
+     * requirements of the machine class
+     */
+
+    for (i = 0; i < nb_nodes - 1; i++) {
+        nodes[i].node_mem = (size / nb_nodes) &
+                            ~((1 << mc->numa_mem_align_shift) - 1);
+        usedmem += nodes[i].node_mem;
+    }
+    nodes[i].node_mem = size - usedmem;
+}
+
 void parse_numa_opts(MachineClass *mc)
 {
     int i;
@@ -336,17 +368,7 @@  void parse_numa_opts(MachineClass *mc)
             }
         }
         if (i == nb_numa_nodes) {
-            uint64_t usedmem = 0;
-
-            /* Align each node according to the alignment
-             * requirements of the machine class
-             */
-            for (i = 0; i < nb_numa_nodes - 1; i++) {
-                numa_info[i].node_mem = (ram_size / nb_numa_nodes) &
-                                        ~((1 << mc->numa_mem_align_shift) - 1);
-                usedmem += numa_info[i].node_mem;
-            }
-            numa_info[i].node_mem = ram_size - usedmem;
+            numa_auto_assign_ram(mc, numa_info, nb_numa_nodes, ram_size);
         }
 
         numa_total = 0;