diff mbox series

KVM: PPC: Book3S HV: Fix conversion to gfn-based MMU notifier callbacks

Message ID 20210505121509.1470207-1-npiggin@gmail.com
State New
Headers show
Series KVM: PPC: Book3S HV: Fix conversion to gfn-based MMU notifier callbacks | expand

Commit Message

Nicholas Piggin May 5, 2021, 12:15 p.m. UTC
Commit b1c5356e873c ("KVM: PPC: Convert to the gfn-based MMU notifier
callbacks") causes unmap_gfn_range and age_gfn callbacks to only work
on the first gfn in the range. It also makes the aging callbacks call
into both radix and hash aging functions for radix guests. Fix this.

Add warnings for the single-gfn calls that have been converted to range
callbacks, in case they ever receieve ranges greater than 1.

Fixes: b1c5356e873c ("KVM: PPC: Convert to the gfn-based MMU notifier callbacks")
Reported-by: Bharata B Rao <bharata@linux.ibm.com>
Tested-by: Bharata B Rao <bharata@linux.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
The e500 change in that commit also looks suspicious, why is it okay
to remove kvm_flush_remote_tlbs() there? Also is the the change from
returning false to true intended?

Thanks,
Nick

 arch/powerpc/include/asm/kvm_book3s.h  |  2 +-
 arch/powerpc/kvm/book3s_64_mmu_hv.c    | 46 ++++++++++++++++++--------
 arch/powerpc/kvm/book3s_64_mmu_radix.c |  5 ++-
 3 files changed, 36 insertions(+), 17 deletions(-)

Comments

Sean Christopherson May 5, 2021, 3:52 p.m. UTC | #1
On Wed, May 05, 2021, Nicholas Piggin wrote:
> Commit b1c5356e873c ("KVM: PPC: Convert to the gfn-based MMU notifier
> callbacks") causes unmap_gfn_range and age_gfn callbacks to only work
> on the first gfn in the range. It also makes the aging callbacks call
> into both radix and hash aging functions for radix guests. Fix this.

Ugh, the rest of kvm_handle_hva_range() was so similar to the x86 code that I
glossed right over the for-loop.  My apologies :-/

> Add warnings for the single-gfn calls that have been converted to range
> callbacks, in case they ever receieve ranges greater than 1.
> 
> Fixes: b1c5356e873c ("KVM: PPC: Convert to the gfn-based MMU notifier callbacks")
> Reported-by: Bharata B Rao <bharata@linux.ibm.com>
> Tested-by: Bharata B Rao <bharata@linux.ibm.com>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> The e500 change in that commit also looks suspicious, why is it okay
> to remove kvm_flush_remote_tlbs() there? Also is the the change from
> returning false to true intended?

The common code interprets a return of "true" as "do kvm_flush_remote_tlbs()".
There is technically a functional change, as the deferring the flush to common
code will batch flushes if the invalidation spans multiple memslots.  But the
mmu_lock is held the entire time, so batching is a good thing unless e500 has
wildly different MMU semantics.
Paolo Bonzini May 5, 2021, 4:02 p.m. UTC | #2
On 05/05/21 14:15, Nicholas Piggin wrote:
> Commit b1c5356e873c ("KVM: PPC: Convert to the gfn-based MMU notifier
> callbacks") causes unmap_gfn_range and age_gfn callbacks to only work
> on the first gfn in the range. It also makes the aging callbacks call
> into both radix and hash aging functions for radix guests. Fix this.
> 
> Add warnings for the single-gfn calls that have been converted to range
> callbacks, in case they ever receieve ranges greater than 1.
> 
> Fixes: b1c5356e873c ("KVM: PPC: Convert to the gfn-based MMU notifier callbacks")
> Reported-by: Bharata B Rao <bharata@linux.ibm.com>
> Tested-by: Bharata B Rao <bharata@linux.ibm.com>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>

Sorry for the breakage.  I queued this patch.

Paolo

> ---
> The e500 change in that commit also looks suspicious, why is it okay
> to remove kvm_flush_remote_tlbs() there? Also is the the change from
> returning false to true intended?
> 
> Thanks,
> Nick
> 
>   arch/powerpc/include/asm/kvm_book3s.h  |  2 +-
>   arch/powerpc/kvm/book3s_64_mmu_hv.c    | 46 ++++++++++++++++++--------
>   arch/powerpc/kvm/book3s_64_mmu_radix.c |  5 ++-
>   3 files changed, 36 insertions(+), 17 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
> index a6e9a5585e61..e6b53c6e21e3 100644
> --- a/arch/powerpc/include/asm/kvm_book3s.h
> +++ b/arch/powerpc/include/asm/kvm_book3s.h
> @@ -210,7 +210,7 @@ extern void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd,
>   				      unsigned int lpid);
>   extern int kvmppc_radix_init(void);
>   extern void kvmppc_radix_exit(void);
> -extern bool kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
> +extern void kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
>   			    unsigned long gfn);
>   extern bool kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
>   			  unsigned long gfn);
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> index b7bd9ca040b8..2d9193cd73be 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> @@ -795,7 +795,7 @@ static void kvmppc_unmap_hpte(struct kvm *kvm, unsigned long i,
>   	}
>   }
>   
> -static bool kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
> +static void kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
>   			    unsigned long gfn)
>   {
>   	unsigned long i;
> @@ -829,15 +829,21 @@ static bool kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
>   		unlock_rmap(rmapp);
>   		__unlock_hpte(hptep, be64_to_cpu(hptep[0]));
>   	}
> -	return false;
>   }
>   
>   bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range)
>   {
> -	if (kvm_is_radix(kvm))
> -		return kvm_unmap_radix(kvm, range->slot, range->start);
> +	gfn_t gfn;
> +
> +	if (kvm_is_radix(kvm)) {
> +		for (gfn = range->start; gfn < range->end; gfn++)
> +			kvm_unmap_radix(kvm, range->slot, gfn);
> +	} else {
> +		for (gfn = range->start; gfn < range->end; gfn++)
> +			kvm_unmap_rmapp(kvm, range->slot, range->start);
> +	}
>   
> -	return kvm_unmap_rmapp(kvm, range->slot, range->start);
> +	return false;
>   }
>   
>   void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
> @@ -924,10 +930,18 @@ static bool kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
>   
>   bool kvm_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
>   {
> -	if (kvm_is_radix(kvm))
> -		kvm_age_radix(kvm, range->slot, range->start);
> +	gfn_t gfn;
> +	bool ret = false;
>   
> -	return kvm_age_rmapp(kvm, range->slot, range->start);
> +	if (kvm_is_radix(kvm)) {
> +		for (gfn = range->start; gfn < range->end; gfn++)
> +			ret |= kvm_age_radix(kvm, range->slot, gfn);
> +	} else {
> +		for (gfn = range->start; gfn < range->end; gfn++)
> +			ret |= kvm_age_rmapp(kvm, range->slot, gfn);
> +	}
> +
> +	return ret;
>   }
>   
>   static bool kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
> @@ -965,18 +979,24 @@ static bool kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
>   
>   bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
>   {
> -	if (kvm_is_radix(kvm))
> -		kvm_test_age_radix(kvm, range->slot, range->start);
> +	WARN_ON(range->start + 1 != range->end);
>   
> -	return kvm_test_age_rmapp(kvm, range->slot, range->start);
> +	if (kvm_is_radix(kvm))
> +		return kvm_test_age_radix(kvm, range->slot, range->start);
> +	else
> +		return kvm_test_age_rmapp(kvm, range->slot, range->start);
>   }
>   
>   bool kvm_set_spte_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
>   {
> +	WARN_ON(range->start + 1 != range->end);
> +
>   	if (kvm_is_radix(kvm))
> -		return kvm_unmap_radix(kvm, range->slot, range->start);
> +		kvm_unmap_radix(kvm, range->slot, range->start);
> +	else
> +		kvm_unmap_rmapp(kvm, range->slot, range->start);
>   
> -	return kvm_unmap_rmapp(kvm, range->slot, range->start);
> +	return false;
>   }
>   
>   static int vcpus_running(struct kvm *kvm)
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> index ec4f58fa9f5a..d909c069363e 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> @@ -993,7 +993,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_vcpu *vcpu,
>   }
>   
>   /* Called with kvm->mmu_lock held */
> -bool kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
> +void kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
>   		     unsigned long gfn)
>   {
>   	pte_t *ptep;
> @@ -1002,14 +1002,13 @@ bool kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
>   
>   	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) {
>   		uv_page_inval(kvm->arch.lpid, gpa, PAGE_SHIFT);
> -		return false;
> +		return;
>   	}
>   
>   	ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
>   	if (ptep && pte_present(*ptep))
>   		kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
>   				 kvm->arch.lpid);
> -	return false;
>   }
>   
>   /* Called with kvm->mmu_lock held */
>
Nicholas Piggin May 6, 2021, 4:57 a.m. UTC | #3
Excerpts from Sean Christopherson's message of May 6, 2021 1:52 am:
> On Wed, May 05, 2021, Nicholas Piggin wrote:
>> Commit b1c5356e873c ("KVM: PPC: Convert to the gfn-based MMU notifier
>> callbacks") causes unmap_gfn_range and age_gfn callbacks to only work
>> on the first gfn in the range. It also makes the aging callbacks call
>> into both radix and hash aging functions for radix guests. Fix this.
> 
> Ugh, the rest of kvm_handle_hva_range() was so similar to the x86 code that I
> glossed right over the for-loop.  My apologies :-/

No problem, we should have noticed it here in testing earlier too.

> 
>> Add warnings for the single-gfn calls that have been converted to range
>> callbacks, in case they ever receieve ranges greater than 1.
>> 
>> Fixes: b1c5356e873c ("KVM: PPC: Convert to the gfn-based MMU notifier callbacks")
>> Reported-by: Bharata B Rao <bharata@linux.ibm.com>
>> Tested-by: Bharata B Rao <bharata@linux.ibm.com>
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>> The e500 change in that commit also looks suspicious, why is it okay
>> to remove kvm_flush_remote_tlbs() there? Also is the the change from
>> returning false to true intended?
> 
> The common code interprets a return of "true" as "do kvm_flush_remote_tlbs()".
> There is technically a functional change, as the deferring the flush to common
> code will batch flushes if the invalidation spans multiple memslots.  But the
> mmu_lock is held the entire time, so batching is a good thing unless e500 has
> wildly different MMU semantics.

Ah okay that explains it. That sounds good, but I don't know the e500 
KVM code or have a way to test it myself.

Thanks,
Nick
Michael Ellerman May 6, 2021, 1:43 p.m. UTC | #4
Paolo Bonzini <pbonzini@redhat.com> writes:
> On 05/05/21 14:15, Nicholas Piggin wrote:
>> Commit b1c5356e873c ("KVM: PPC: Convert to the gfn-based MMU notifier
>> callbacks") causes unmap_gfn_range and age_gfn callbacks to only work
>> on the first gfn in the range. It also makes the aging callbacks call
>> into both radix and hash aging functions for radix guests. Fix this.
>> 
>> Add warnings for the single-gfn calls that have been converted to range
>> callbacks, in case they ever receieve ranges greater than 1.
>> 
>> Fixes: b1c5356e873c ("KVM: PPC: Convert to the gfn-based MMU notifier callbacks")
>> Reported-by: Bharata B Rao <bharata@linux.ibm.com>
>> Tested-by: Bharata B Rao <bharata@linux.ibm.com>
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>
> Sorry for the breakage.  I queued this patch.

Thanks. Are you planning to send it to Linus before rc1?

If not I can pick it up as I already have some things in my next and am
intending to send a pull request anyway.

cheers
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index a6e9a5585e61..e6b53c6e21e3 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -210,7 +210,7 @@  extern void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd,
 				      unsigned int lpid);
 extern int kvmppc_radix_init(void);
 extern void kvmppc_radix_exit(void);
-extern bool kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
+extern void kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
 			    unsigned long gfn);
 extern bool kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
 			  unsigned long gfn);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index b7bd9ca040b8..2d9193cd73be 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -795,7 +795,7 @@  static void kvmppc_unmap_hpte(struct kvm *kvm, unsigned long i,
 	}
 }
 
-static bool kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
+static void kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
 			    unsigned long gfn)
 {
 	unsigned long i;
@@ -829,15 +829,21 @@  static bool kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
 		unlock_rmap(rmapp);
 		__unlock_hpte(hptep, be64_to_cpu(hptep[0]));
 	}
-	return false;
 }
 
 bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range)
 {
-	if (kvm_is_radix(kvm))
-		return kvm_unmap_radix(kvm, range->slot, range->start);
+	gfn_t gfn;
+
+	if (kvm_is_radix(kvm)) {
+		for (gfn = range->start; gfn < range->end; gfn++)
+			kvm_unmap_radix(kvm, range->slot, gfn);
+	} else {
+		for (gfn = range->start; gfn < range->end; gfn++)
+			kvm_unmap_rmapp(kvm, range->slot, range->start);
+	}
 
-	return kvm_unmap_rmapp(kvm, range->slot, range->start);
+	return false;
 }
 
 void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
@@ -924,10 +930,18 @@  static bool kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
 
 bool kvm_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
 {
-	if (kvm_is_radix(kvm))
-		kvm_age_radix(kvm, range->slot, range->start);
+	gfn_t gfn;
+	bool ret = false;
 
-	return kvm_age_rmapp(kvm, range->slot, range->start);
+	if (kvm_is_radix(kvm)) {
+		for (gfn = range->start; gfn < range->end; gfn++)
+			ret |= kvm_age_radix(kvm, range->slot, gfn);
+	} else {
+		for (gfn = range->start; gfn < range->end; gfn++)
+			ret |= kvm_age_rmapp(kvm, range->slot, gfn);
+	}
+
+	return ret;
 }
 
 static bool kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
@@ -965,18 +979,24 @@  static bool kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
 
 bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
 {
-	if (kvm_is_radix(kvm))
-		kvm_test_age_radix(kvm, range->slot, range->start);
+	WARN_ON(range->start + 1 != range->end);
 
-	return kvm_test_age_rmapp(kvm, range->slot, range->start);
+	if (kvm_is_radix(kvm))
+		return kvm_test_age_radix(kvm, range->slot, range->start);
+	else
+		return kvm_test_age_rmapp(kvm, range->slot, range->start);
 }
 
 bool kvm_set_spte_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
 {
+	WARN_ON(range->start + 1 != range->end);
+
 	if (kvm_is_radix(kvm))
-		return kvm_unmap_radix(kvm, range->slot, range->start);
+		kvm_unmap_radix(kvm, range->slot, range->start);
+	else
+		kvm_unmap_rmapp(kvm, range->slot, range->start);
 
-	return kvm_unmap_rmapp(kvm, range->slot, range->start);
+	return false;
 }
 
 static int vcpus_running(struct kvm *kvm)
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index ec4f58fa9f5a..d909c069363e 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -993,7 +993,7 @@  int kvmppc_book3s_radix_page_fault(struct kvm_vcpu *vcpu,
 }
 
 /* Called with kvm->mmu_lock held */
-bool kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
+void kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
 		     unsigned long gfn)
 {
 	pte_t *ptep;
@@ -1002,14 +1002,13 @@  bool kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
 
 	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) {
 		uv_page_inval(kvm->arch.lpid, gpa, PAGE_SHIFT);
-		return false;
+		return;
 	}
 
 	ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
 	if (ptep && pte_present(*ptep))
 		kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
 				 kvm->arch.lpid);
-	return false;
 }
 
 /* Called with kvm->mmu_lock held */