diff mbox series

[v3,09/70] physmem: Introduce ram_block_convert_range() for page conversion

Message ID 20231115071519.2864957-10-xiaoyao.li@intel.com
State New
Headers show
Series QEMU Guest memfd + QEMU TDX support | expand

Commit Message

Xiaoyao Li Nov. 15, 2023, 7:14 a.m. UTC
It's used for discarding opposite memory after memory conversion, for
confidential guest.

When page is converted from shared to private, the original shared
memory can be discarded via ram_block_discard_range();

When page is converted from private to shared, the original private
memory is back'ed by guest_memfd. Introduce
ram_block_discard_guest_memfd_range() for discarding memory in
guest_memfd.

Originally-from: Isaku Yamahata <isaku.yamahata@intel.com>
Codeveloped-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
---
 include/exec/cpu-common.h |  2 ++
 system/physmem.c          | 50 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+)

Comments

Isaku Yamahata Nov. 17, 2023, 9:03 p.m. UTC | #1
On Wed, Nov 15, 2023 at 02:14:18AM -0500,
Xiaoyao Li <xiaoyao.li@intel.com> wrote:

> It's used for discarding opposite memory after memory conversion, for
> confidential guest.
> 
> When page is converted from shared to private, the original shared
> memory can be discarded via ram_block_discard_range();
> 
> When page is converted from private to shared, the original private
> memory is back'ed by guest_memfd. Introduce
> ram_block_discard_guest_memfd_range() for discarding memory in
> guest_memfd.
> 
> Originally-from: Isaku Yamahata <isaku.yamahata@intel.com>
> Codeveloped-by: Xiaoyao Li <xiaoyao.li@intel.com>
> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
> ---
>  include/exec/cpu-common.h |  2 ++
>  system/physmem.c          | 50 +++++++++++++++++++++++++++++++++++++++
>  2 files changed, 52 insertions(+)
> 
> diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
> index 41115d891940..de728a18eef2 100644
> --- a/include/exec/cpu-common.h
> +++ b/include/exec/cpu-common.h
> @@ -175,6 +175,8 @@ typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque);
>  
>  int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
>  int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length);
> +int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t length,
> +                            bool shared_to_private);
>  
>  #endif
>  
> diff --git a/system/physmem.c b/system/physmem.c
> index ddfecddefcd6..cd6008fa09ad 100644
> --- a/system/physmem.c
> +++ b/system/physmem.c
> @@ -3641,6 +3641,29 @@ err:
>      return ret;
>  }
>  
> +static int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start,
> +                                               size_t length)
> +{
> +    int ret = -1;
> +
> +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
> +    ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
> +                    start, length);
> +
> +    if (ret) {
> +        ret = -errno;
> +        error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)",
> +                     __func__, rb->idstr, start, length, ret);
> +    }
> +#else
> +    ret = -ENOSYS;
> +    error_report("%s: fallocate not available %s:%" PRIx64 " +%zx (%d)",
> +                 __func__, rb->idstr, start, length, ret);
> +#endif
> +
> +    return ret;
> +}
> +
>  bool ramblock_is_pmem(RAMBlock *rb)
>  {
>      return rb->flags & RAM_PMEM;
> @@ -3828,3 +3851,30 @@ bool ram_block_discard_is_required(void)
>      return qatomic_read(&ram_block_discard_required_cnt) ||
>             qatomic_read(&ram_block_coordinated_discard_required_cnt);
>  }
> +
> +int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t length,
> +                            bool shared_to_private)
> +{
> +    if (!rb || rb->guest_memfd < 0) {
> +        return -1;
> +    }
> +
> +    if (!QEMU_PTR_IS_ALIGNED(start, qemu_host_page_size) ||
> +        !QEMU_PTR_IS_ALIGNED(length, qemu_host_page_size)) {
> +        return -1;
> +    }
> +
> +    if (!length) {
> +        return -1;
> +    }
> +
> +    if (start + length > rb->max_length) {
> +        return -1;
> +    }
> +
> +    if (shared_to_private) {
> +        return ram_block_discard_range(rb, start, length);
> +    } else {
> +        return ram_block_discard_guest_memfd_range(rb, start, length);
> +    }
> +}

Originally this function issued KVM_SET_MEMORY_ATTRIBUTES, the function name
mad sense. But now it doesn't, and it issues only punch hole. We should rename
it to represent what it actually does. discard_range?
Xiaoyao Li Dec. 8, 2023, 7:59 a.m. UTC | #2
On 11/18/2023 5:03 AM, Isaku Yamahata wrote:
> On Wed, Nov 15, 2023 at 02:14:18AM -0500,
> Xiaoyao Li <xiaoyao.li@intel.com> wrote:
> 
>> It's used for discarding opposite memory after memory conversion, for
>> confidential guest.
>>
>> When page is converted from shared to private, the original shared
>> memory can be discarded via ram_block_discard_range();
>>
>> When page is converted from private to shared, the original private
>> memory is back'ed by guest_memfd. Introduce
>> ram_block_discard_guest_memfd_range() for discarding memory in
>> guest_memfd.
>>
>> Originally-from: Isaku Yamahata <isaku.yamahata@intel.com>
>> Codeveloped-by: Xiaoyao Li <xiaoyao.li@intel.com>
>> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
>> ---
>>   include/exec/cpu-common.h |  2 ++
>>   system/physmem.c          | 50 +++++++++++++++++++++++++++++++++++++++
>>   2 files changed, 52 insertions(+)
>>
>> diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
>> index 41115d891940..de728a18eef2 100644
>> --- a/include/exec/cpu-common.h
>> +++ b/include/exec/cpu-common.h
>> @@ -175,6 +175,8 @@ typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque);
>>   
>>   int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
>>   int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length);
>> +int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t length,
>> +                            bool shared_to_private);
>>   
>>   #endif
>>   
>> diff --git a/system/physmem.c b/system/physmem.c
>> index ddfecddefcd6..cd6008fa09ad 100644
>> --- a/system/physmem.c
>> +++ b/system/physmem.c
>> @@ -3641,6 +3641,29 @@ err:
>>       return ret;
>>   }
>>   
>> +static int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start,
>> +                                               size_t length)
>> +{
>> +    int ret = -1;
>> +
>> +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
>> +    ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
>> +                    start, length);
>> +
>> +    if (ret) {
>> +        ret = -errno;
>> +        error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)",
>> +                     __func__, rb->idstr, start, length, ret);
>> +    }
>> +#else
>> +    ret = -ENOSYS;
>> +    error_report("%s: fallocate not available %s:%" PRIx64 " +%zx (%d)",
>> +                 __func__, rb->idstr, start, length, ret);
>> +#endif
>> +
>> +    return ret;
>> +}
>> +
>>   bool ramblock_is_pmem(RAMBlock *rb)
>>   {
>>       return rb->flags & RAM_PMEM;
>> @@ -3828,3 +3851,30 @@ bool ram_block_discard_is_required(void)
>>       return qatomic_read(&ram_block_discard_required_cnt) ||
>>              qatomic_read(&ram_block_coordinated_discard_required_cnt);
>>   }
>> +
>> +int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t length,
>> +                            bool shared_to_private)
>> +{
>> +    if (!rb || rb->guest_memfd < 0) {
>> +        return -1;
>> +    }
>> +
>> +    if (!QEMU_PTR_IS_ALIGNED(start, qemu_host_page_size) ||
>> +        !QEMU_PTR_IS_ALIGNED(length, qemu_host_page_size)) {
>> +        return -1;
>> +    }
>> +
>> +    if (!length) {
>> +        return -1;
>> +    }
>> +
>> +    if (start + length > rb->max_length) {
>> +        return -1;
>> +    }
>> +
>> +    if (shared_to_private) {
>> +        return ram_block_discard_range(rb, start, length);
>> +    } else {
>> +        return ram_block_discard_guest_memfd_range(rb, start, length);
>> +    }
>> +}
> 
> Originally this function issued KVM_SET_MEMORY_ATTRIBUTES, the function name
> mad sense. But now it doesn't, and it issues only punch hole. We should rename
> it to represent what it actually does. discard_range?

ram_block_discard_range() already exists for non-guest-memfd memory discard.

I cannot come up with a proper name. e.g., 
ram_block_discard_opposite_range() while *opposite* seems unclear.

Do you have any better idea?
David Hildenbrand Dec. 8, 2023, 11:52 a.m. UTC | #3
On 08.12.23 08:59, Xiaoyao Li wrote:
> On 11/18/2023 5:03 AM, Isaku Yamahata wrote:
>> On Wed, Nov 15, 2023 at 02:14:18AM -0500,
>> Xiaoyao Li <xiaoyao.li@intel.com> wrote:
>>
>>> It's used for discarding opposite memory after memory conversion, for
>>> confidential guest.
>>>
>>> When page is converted from shared to private, the original shared
>>> memory can be discarded via ram_block_discard_range();
>>>
>>> When page is converted from private to shared, the original private
>>> memory is back'ed by guest_memfd. Introduce
>>> ram_block_discard_guest_memfd_range() for discarding memory in
>>> guest_memfd.
>>>
>>> Originally-from: Isaku Yamahata <isaku.yamahata@intel.com>
>>> Codeveloped-by: Xiaoyao Li <xiaoyao.li@intel.com>
>>> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
>>> ---
>>>    include/exec/cpu-common.h |  2 ++
>>>    system/physmem.c          | 50 +++++++++++++++++++++++++++++++++++++++
>>>    2 files changed, 52 insertions(+)
>>>
>>> diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
>>> index 41115d891940..de728a18eef2 100644
>>> --- a/include/exec/cpu-common.h
>>> +++ b/include/exec/cpu-common.h
>>> @@ -175,6 +175,8 @@ typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque);
>>>    
>>>    int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
>>>    int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length);
>>> +int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t length,
>>> +                            bool shared_to_private);
>>>    
>>>    #endif
>>>    
>>> diff --git a/system/physmem.c b/system/physmem.c
>>> index ddfecddefcd6..cd6008fa09ad 100644
>>> --- a/system/physmem.c
>>> +++ b/system/physmem.c
>>> @@ -3641,6 +3641,29 @@ err:
>>>        return ret;
>>>    }
>>>    
>>> +static int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start,
>>> +                                               size_t length)
>>> +{
>>> +    int ret = -1;
>>> +
>>> +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
>>> +    ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
>>> +                    start, length);
>>> +
>>> +    if (ret) {
>>> +        ret = -errno;
>>> +        error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)",
>>> +                     __func__, rb->idstr, start, length, ret);
>>> +    }
>>> +#else
>>> +    ret = -ENOSYS;
>>> +    error_report("%s: fallocate not available %s:%" PRIx64 " +%zx (%d)",
>>> +                 __func__, rb->idstr, start, length, ret);
>>> +#endif
>>> +
>>> +    return ret;
>>> +}
>>> +
>>>    bool ramblock_is_pmem(RAMBlock *rb)
>>>    {
>>>        return rb->flags & RAM_PMEM;
>>> @@ -3828,3 +3851,30 @@ bool ram_block_discard_is_required(void)
>>>        return qatomic_read(&ram_block_discard_required_cnt) ||
>>>               qatomic_read(&ram_block_coordinated_discard_required_cnt);
>>>    }
>>> +
>>> +int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t length,
>>> +                            bool shared_to_private)
>>> +{
>>> +    if (!rb || rb->guest_memfd < 0) {
>>> +        return -1;
>>> +    }
>>> +
>>> +    if (!QEMU_PTR_IS_ALIGNED(start, qemu_host_page_size) ||
>>> +        !QEMU_PTR_IS_ALIGNED(length, qemu_host_page_size)) {
>>> +        return -1;
>>> +    }
>>> +
>>> +    if (!length) {
>>> +        return -1;
>>> +    }
>>> +
>>> +    if (start + length > rb->max_length) {
>>> +        return -1;
>>> +    }
>>> +
>>> +    if (shared_to_private) {
>>> +        return ram_block_discard_range(rb, start, length);
>>> +    } else {
>>> +        return ram_block_discard_guest_memfd_range(rb, start, length);
>>> +    }
>>> +}
>>
>> Originally this function issued KVM_SET_MEMORY_ATTRIBUTES, the function name
>> mad sense. But now it doesn't, and it issues only punch hole. We should rename
>> it to represent what it actually does. discard_range?
> 
> ram_block_discard_range() already exists for non-guest-memfd memory discard.
> 
> I cannot come up with a proper name. e.g.,
> ram_block_discard_opposite_range() while *opposite* seems unclear.
> 
> Do you have any better idea?

Having some indication that this is about "guest_memfd" back and forth 
switching/conversion will make sense. But I'm also not able to come up 
with a better name.

Maybe have two functions:

ram_block_activate_guest_memfd_range
ram_block_deactivate_guest_memfd_range
Xiaoyao Li Dec. 21, 2023, 6:18 a.m. UTC | #4
On 12/8/2023 7:52 PM, David Hildenbrand wrote:
> On 08.12.23 08:59, Xiaoyao Li wrote:
>> On 11/18/2023 5:03 AM, Isaku Yamahata wrote:
>>> On Wed, Nov 15, 2023 at 02:14:18AM -0500,
>>> Xiaoyao Li <xiaoyao.li@intel.com> wrote:
>>>
>>>> It's used for discarding opposite memory after memory conversion, for
>>>> confidential guest.
>>>>
>>>> When page is converted from shared to private, the original shared
>>>> memory can be discarded via ram_block_discard_range();
>>>>
>>>> When page is converted from private to shared, the original private
>>>> memory is back'ed by guest_memfd. Introduce
>>>> ram_block_discard_guest_memfd_range() for discarding memory in
>>>> guest_memfd.
>>>>
>>>> Originally-from: Isaku Yamahata <isaku.yamahata@intel.com>
>>>> Codeveloped-by: Xiaoyao Li <xiaoyao.li@intel.com>
>>>> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
>>>> ---
>>>>    include/exec/cpu-common.h |  2 ++
>>>>    system/physmem.c          | 50 
>>>> +++++++++++++++++++++++++++++++++++++++
>>>>    2 files changed, 52 insertions(+)
>>>>
>>>> diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
>>>> index 41115d891940..de728a18eef2 100644
>>>> --- a/include/exec/cpu-common.h
>>>> +++ b/include/exec/cpu-common.h
>>>> @@ -175,6 +175,8 @@ typedef int (RAMBlockIterFunc)(RAMBlock *rb, 
>>>> void *opaque);
>>>>    int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
>>>>    int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t 
>>>> length);
>>>> +int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t 
>>>> length,
>>>> +                            bool shared_to_private);
>>>>    #endif
>>>> diff --git a/system/physmem.c b/system/physmem.c
>>>> index ddfecddefcd6..cd6008fa09ad 100644
>>>> --- a/system/physmem.c
>>>> +++ b/system/physmem.c
>>>> @@ -3641,6 +3641,29 @@ err:
>>>>        return ret;
>>>>    }
>>>> +static int ram_block_discard_guest_memfd_range(RAMBlock *rb, 
>>>> uint64_t start,
>>>> +                                               size_t length)
>>>> +{
>>>> +    int ret = -1;
>>>> +
>>>> +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
>>>> +    ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | 
>>>> FALLOC_FL_KEEP_SIZE,
>>>> +                    start, length);
>>>> +
>>>> +    if (ret) {
>>>> +        ret = -errno;
>>>> +        error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx 
>>>> (%d)",
>>>> +                     __func__, rb->idstr, start, length, ret);
>>>> +    }
>>>> +#else
>>>> +    ret = -ENOSYS;
>>>> +    error_report("%s: fallocate not available %s:%" PRIx64 " +%zx 
>>>> (%d)",
>>>> +                 __func__, rb->idstr, start, length, ret);
>>>> +#endif
>>>> +
>>>> +    return ret;
>>>> +}
>>>> +
>>>>    bool ramblock_is_pmem(RAMBlock *rb)
>>>>    {
>>>>        return rb->flags & RAM_PMEM;
>>>> @@ -3828,3 +3851,30 @@ bool ram_block_discard_is_required(void)
>>>>        return qatomic_read(&ram_block_discard_required_cnt) ||
>>>>               
>>>> qatomic_read(&ram_block_coordinated_discard_required_cnt);
>>>>    }
>>>> +
>>>> +int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t 
>>>> length,
>>>> +                            bool shared_to_private)
>>>> +{
>>>> +    if (!rb || rb->guest_memfd < 0) {
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    if (!QEMU_PTR_IS_ALIGNED(start, qemu_host_page_size) ||
>>>> +        !QEMU_PTR_IS_ALIGNED(length, qemu_host_page_size)) {
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    if (!length) {
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    if (start + length > rb->max_length) {
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    if (shared_to_private) {
>>>> +        return ram_block_discard_range(rb, start, length);
>>>> +    } else {
>>>> +        return ram_block_discard_guest_memfd_range(rb, start, length);
>>>> +    }
>>>> +}
>>>
>>> Originally this function issued KVM_SET_MEMORY_ATTRIBUTES, the 
>>> function name
>>> mad sense. But now it doesn't, and it issues only punch hole. We 
>>> should rename
>>> it to represent what it actually does. discard_range?
>>
>> ram_block_discard_range() already exists for non-guest-memfd memory 
>> discard.
>>
>> I cannot come up with a proper name. e.g.,
>> ram_block_discard_opposite_range() while *opposite* seems unclear.
>>
>> Do you have any better idea?
> 
> Having some indication that this is about "guest_memfd" back and forth 
> switching/conversion will make sense. But I'm also not able to come up 
> with a better name.
> 
> Maybe have two functions:
> 
> ram_block_activate_guest_memfd_range
> ram_block_deactivate_guest_memfd_range
> 

finally, I decide to drop this function and expose 
ram_block_discard_guest_memfd_range() instead. So caller can call the 
ram_block_discard_*() on its own.
diff mbox series

Patch

diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 41115d891940..de728a18eef2 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -175,6 +175,8 @@  typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque);
 
 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
 int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length);
+int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t length,
+                            bool shared_to_private);
 
 #endif
 
diff --git a/system/physmem.c b/system/physmem.c
index ddfecddefcd6..cd6008fa09ad 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -3641,6 +3641,29 @@  err:
     return ret;
 }
 
+static int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start,
+                                               size_t length)
+{
+    int ret = -1;
+
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+    ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+                    start, length);
+
+    if (ret) {
+        ret = -errno;
+        error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)",
+                     __func__, rb->idstr, start, length, ret);
+    }
+#else
+    ret = -ENOSYS;
+    error_report("%s: fallocate not available %s:%" PRIx64 " +%zx (%d)",
+                 __func__, rb->idstr, start, length, ret);
+#endif
+
+    return ret;
+}
+
 bool ramblock_is_pmem(RAMBlock *rb)
 {
     return rb->flags & RAM_PMEM;
@@ -3828,3 +3851,30 @@  bool ram_block_discard_is_required(void)
     return qatomic_read(&ram_block_discard_required_cnt) ||
            qatomic_read(&ram_block_coordinated_discard_required_cnt);
 }
+
+int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t length,
+                            bool shared_to_private)
+{
+    if (!rb || rb->guest_memfd < 0) {
+        return -1;
+    }
+
+    if (!QEMU_PTR_IS_ALIGNED(start, qemu_host_page_size) ||
+        !QEMU_PTR_IS_ALIGNED(length, qemu_host_page_size)) {
+        return -1;
+    }
+
+    if (!length) {
+        return -1;
+    }
+
+    if (start + length > rb->max_length) {
+        return -1;
+    }
+
+    if (shared_to_private) {
+        return ram_block_discard_range(rb, start, length);
+    } else {
+        return ram_block_discard_guest_memfd_range(rb, start, length);
+    }
+}