@@ -205,15 +205,10 @@ DESIGN REQUIREMENTS:
(Current solution)
-We have updated cputlb.c to defer operations when a cross-vCPU
-operation with async_run_on_cpu() which ensures each vCPU sees a
-coherent state when it next runs its work (in a few instructions
-time).
-
-A new set up operations (tlb_flush_*_all_cpus) take an additional flag
-which when set will force synchronisation by setting the source vCPUs
-work as "safe work" and exiting the cpu run loop. This ensure by the
-time execution restarts all flush operations have completed.
+A new set of tlb flush operations (tlb_flush_*_all_cpus_synced) force
+synchronisation by setting the source vCPUs work as "safe work" and
+exiting the cpu run loop. This ensures that by the time execution
+restarts all flush operations have completed.
TLB flag updates are all done atomically and are also protected by the
corresponding page lock.
@@ -66,24 +66,15 @@ void tlb_destroy(CPUState *cpu);
*/
void tlb_flush_page(CPUState *cpu, vaddr addr);
/**
- * tlb_flush_page_all_cpus:
+ * tlb_flush_page_all_cpus_synced:
* @cpu: src CPU of the flush
* @addr: virtual address of page to be flushed
*
- * Flush one page from the TLB of the specified CPU, for all
+ * Flush one page from the TLB of all CPUs, for all
* MMU indexes.
- */
-void tlb_flush_page_all_cpus(CPUState *src, vaddr addr);
-/**
- * tlb_flush_page_all_cpus_synced:
- * @cpu: src CPU of the flush
- * @addr: virtual address of page to be flushed
*
- * Flush one page from the TLB of the specified CPU, for all MMU
- * indexes like tlb_flush_page_all_cpus except the source vCPUs work
- * is scheduled as safe work meaning all flushes will be complete once
- * the source vCPUs safe work is complete. This will depend on when
- * the guests translation ends the TB.
+ * When this function returns, no CPUs will subsequently perform
+ * translations using the flushed TLBs.
*/
void tlb_flush_page_all_cpus_synced(CPUState *src, vaddr addr);
/**
@@ -96,19 +87,14 @@ void tlb_flush_page_all_cpus_synced(CPUState *src, vaddr addr);
* use one of the other functions for efficiency.
*/
void tlb_flush(CPUState *cpu);
-/**
- * tlb_flush_all_cpus:
- * @cpu: src CPU of the flush
- */
-void tlb_flush_all_cpus(CPUState *src_cpu);
/**
* tlb_flush_all_cpus_synced:
* @cpu: src CPU of the flush
*
- * Like tlb_flush_all_cpus except this except the source vCPUs work is
- * scheduled as safe work meaning all flushes will be complete once
- * the source vCPUs safe work is complete. This will depend on when
- * the guests translation ends the TB.
+ * Flush the entire TLB for all CPUs, for all MMU indexes.
+ *
+ * When this function returns, no CPUs will subsequently perform
+ * translations using the flushed TLBs.
*/
void tlb_flush_all_cpus_synced(CPUState *src_cpu);
/**
@@ -123,27 +109,16 @@ void tlb_flush_all_cpus_synced(CPUState *src_cpu);
void tlb_flush_page_by_mmuidx(CPUState *cpu, vaddr addr,
uint16_t idxmap);
/**
- * tlb_flush_page_by_mmuidx_all_cpus:
+ * tlb_flush_page_by_mmuidx_all_cpus_synced:
* @cpu: Originating CPU of the flush
* @addr: virtual address of page to be flushed
* @idxmap: bitmap of MMU indexes to flush
*
* Flush one page from the TLB of all CPUs, for the specified
* MMU indexes.
- */
-void tlb_flush_page_by_mmuidx_all_cpus(CPUState *cpu, vaddr addr,
- uint16_t idxmap);
-/**
- * tlb_flush_page_by_mmuidx_all_cpus_synced:
- * @cpu: Originating CPU of the flush
- * @addr: virtual address of page to be flushed
- * @idxmap: bitmap of MMU indexes to flush
*
- * Flush one page from the TLB of all CPUs, for the specified MMU
- * indexes like tlb_flush_page_by_mmuidx_all_cpus except the source
- * vCPUs work is scheduled as safe work meaning all flushes will be
- * complete once the source vCPUs safe work is complete. This will
- * depend on when the guests translation ends the TB.
+ * When this function returns, no CPUs will subsequently perform
+ * translations using the flushed TLBs.
*/
void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *cpu, vaddr addr,
uint16_t idxmap);
@@ -158,24 +133,15 @@ void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *cpu, vaddr addr,
*/
void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap);
/**
- * tlb_flush_by_mmuidx_all_cpus:
+ * tlb_flush_by_mmuidx_all_cpus_synced:
* @cpu: Originating CPU of the flush
* @idxmap: bitmap of MMU indexes to flush
*
- * Flush all entries from all TLBs of all CPUs, for the specified
+ * Flush all entries from the TLB of all CPUs, for the specified
* MMU indexes.
- */
-void tlb_flush_by_mmuidx_all_cpus(CPUState *cpu, uint16_t idxmap);
-/**
- * tlb_flush_by_mmuidx_all_cpus_synced:
- * @cpu: Originating CPU of the flush
- * @idxmap: bitmap of MMU indexes to flush
*
- * Flush all entries from all TLBs of all CPUs, for the specified
- * MMU indexes like tlb_flush_by_mmuidx_all_cpus except except the source
- * vCPUs work is scheduled as safe work meaning all flushes will be
- * complete once the source vCPUs safe work is complete. This will
- * depend on when the guests translation ends the TB.
+ * When this function returns, no CPUs will subsequently perform
+ * translations using the flushed TLBs.
*/
void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu, uint16_t idxmap);
@@ -192,8 +158,6 @@ void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, vaddr addr,
uint16_t idxmap, unsigned bits);
/* Similarly, with broadcast and syncing. */
-void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *cpu, vaddr addr,
- uint16_t idxmap, unsigned bits);
void tlb_flush_page_bits_by_mmuidx_all_cpus_synced
(CPUState *cpu, vaddr addr, uint16_t idxmap, unsigned bits);
@@ -213,9 +177,6 @@ void tlb_flush_range_by_mmuidx(CPUState *cpu, vaddr addr,
unsigned bits);
/* Similarly, with broadcast and syncing. */
-void tlb_flush_range_by_mmuidx_all_cpus(CPUState *cpu, vaddr addr,
- vaddr len, uint16_t idxmap,
- unsigned bits);
void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu,
vaddr addr,
vaddr len,
@@ -288,18 +249,12 @@ static inline void tlb_destroy(CPUState *cpu)
static inline void tlb_flush_page(CPUState *cpu, vaddr addr)
{
}
-static inline void tlb_flush_page_all_cpus(CPUState *src, vaddr addr)
-{
-}
static inline void tlb_flush_page_all_cpus_synced(CPUState *src, vaddr addr)
{
}
static inline void tlb_flush(CPUState *cpu)
{
}
-static inline void tlb_flush_all_cpus(CPUState *src_cpu)
-{
-}
static inline void tlb_flush_all_cpus_synced(CPUState *src_cpu)
{
}
@@ -311,20 +266,11 @@ static inline void tlb_flush_page_by_mmuidx(CPUState *cpu,
static inline void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
{
}
-static inline void tlb_flush_page_by_mmuidx_all_cpus(CPUState *cpu,
- vaddr addr,
- uint16_t idxmap)
-{
-}
static inline void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *cpu,
vaddr addr,
uint16_t idxmap)
{
}
-static inline void tlb_flush_by_mmuidx_all_cpus(CPUState *cpu, uint16_t idxmap)
-{
-}
-
static inline void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu,
uint16_t idxmap)
{
@@ -335,12 +281,6 @@ static inline void tlb_flush_page_bits_by_mmuidx(CPUState *cpu,
unsigned bits)
{
}
-static inline void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *cpu,
- vaddr addr,
- uint16_t idxmap,
- unsigned bits)
-{
-}
static inline void
tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *cpu, vaddr addr,
uint16_t idxmap, unsigned bits)
@@ -351,13 +291,6 @@ static inline void tlb_flush_range_by_mmuidx(CPUState *cpu, vaddr addr,
unsigned bits)
{
}
-static inline void tlb_flush_range_by_mmuidx_all_cpus(CPUState *cpu,
- vaddr addr,
- vaddr len,
- uint16_t idxmap,
- unsigned bits)
-{
-}
static inline void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu,
vaddr addr,
vaddr len,
@@ -379,21 +379,6 @@ void tlb_flush(CPUState *cpu)
tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS);
}
-void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap)
-{
- const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
-
- tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
-
- flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
- fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap));
-}
-
-void tlb_flush_all_cpus(CPUState *src_cpu)
-{
- tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS);
-}
-
void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap)
{
const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
@@ -604,46 +589,6 @@ void tlb_flush_page(CPUState *cpu, vaddr addr)
tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS);
}
-void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, vaddr addr,
- uint16_t idxmap)
-{
- tlb_debug("addr: %016" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap);
-
- /* This should already be page aligned */
- addr &= TARGET_PAGE_MASK;
-
- /*
- * Allocate memory to hold addr+idxmap only when needed.
- * See tlb_flush_page_by_mmuidx for details.
- */
- if (idxmap < TARGET_PAGE_SIZE) {
- flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
- RUN_ON_CPU_TARGET_PTR(addr | idxmap));
- } else {
- CPUState *dst_cpu;
-
- /* Allocate a separate data block for each destination cpu. */
- CPU_FOREACH(dst_cpu) {
- if (dst_cpu != src_cpu) {
- TLBFlushPageByMMUIdxData *d
- = g_new(TLBFlushPageByMMUIdxData, 1);
-
- d->addr = addr;
- d->idxmap = idxmap;
- async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
- RUN_ON_CPU_HOST_PTR(d));
- }
- }
- }
-
- tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap);
-}
-
-void tlb_flush_page_all_cpus(CPUState *src, vaddr addr)
-{
- tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS);
-}
-
void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
vaddr addr,
uint16_t idxmap)
@@ -835,54 +780,6 @@ void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, vaddr addr,
tlb_flush_range_by_mmuidx(cpu, addr, TARGET_PAGE_SIZE, idxmap, bits);
}
-void tlb_flush_range_by_mmuidx_all_cpus(CPUState *src_cpu,
- vaddr addr, vaddr len,
- uint16_t idxmap, unsigned bits)
-{
- TLBFlushRangeData d;
- CPUState *dst_cpu;
-
- /*
- * If all bits are significant, and len is small,
- * this devolves to tlb_flush_page.
- */
- if (bits >= TARGET_LONG_BITS && len <= TARGET_PAGE_SIZE) {
- tlb_flush_page_by_mmuidx_all_cpus(src_cpu, addr, idxmap);
- return;
- }
- /* If no page bits are significant, this devolves to tlb_flush. */
- if (bits < TARGET_PAGE_BITS) {
- tlb_flush_by_mmuidx_all_cpus(src_cpu, idxmap);
- return;
- }
-
- /* This should already be page aligned */
- d.addr = addr & TARGET_PAGE_MASK;
- d.len = len;
- d.idxmap = idxmap;
- d.bits = bits;
-
- /* Allocate a separate data block for each destination cpu. */
- CPU_FOREACH(dst_cpu) {
- if (dst_cpu != src_cpu) {
- TLBFlushRangeData *p = g_memdup(&d, sizeof(d));
- async_run_on_cpu(dst_cpu,
- tlb_flush_range_by_mmuidx_async_1,
- RUN_ON_CPU_HOST_PTR(p));
- }
- }
-
- tlb_flush_range_by_mmuidx_async_0(src_cpu, d);
-}
-
-void tlb_flush_page_bits_by_mmuidx_all_cpus(CPUState *src_cpu,
- vaddr addr, uint16_t idxmap,
- unsigned bits)
-{
- tlb_flush_range_by_mmuidx_all_cpus(src_cpu, addr, TARGET_PAGE_SIZE,
- idxmap, bits);
-}
-
void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
vaddr addr,
vaddr len,
These are no longer used. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> --- docs/devel/multi-thread-tcg.rst | 13 ++-- include/exec/exec-all.h | 97 +++++------------------------- accel/tcg/cputlb.c | 103 -------------------------------- 3 files changed, 19 insertions(+), 194 deletions(-)