@@ -27,4 +27,11 @@
void tlb_protect_code(ram_addr_t ram_addr);
void tlb_unprotect_code(ram_addr_t ram_addr);
#endif
+
+void cpu_set_llsc_prot(CPUState *cpu, ram_addr_t addr);
+bool cpu_resolve_llsc_begin(CPUState *cpu);
+void cpu_resolve_llsc_abort(CPUState *cpu);
+bool cpu_resolve_llsc_check(CPUState *cpu, ram_addr_t addr);
+void cpu_resolve_llsc_success(CPUState *cpu);
+
#endif
@@ -147,7 +147,13 @@ static inline void qemu_ram_block_writeback(RAMBlock *block)
}
#define DIRTY_CLIENTS_ALL ((1 << DIRTY_MEMORY_NUM) - 1)
-#define DIRTY_CLIENTS_NOCODE (DIRTY_CLIENTS_ALL & ~(1 << DIRTY_MEMORY_CODE))
+
+/*
+ * Migration and VGA just want to see the dirty bit set, hence "oneshot".
+ * As opposed to code and llsc_prot which want to be called on every store.
+ */
+#define DIRTY_CLIENTS_ONESHOT ((1 << DIRTY_MEMORY_VGA) | \
+ (1 << DIRTY_MEMORY_MIGRATION))
static inline bool cpu_physical_memory_get_dirty(ram_addr_t start,
ram_addr_t length,
@@ -240,7 +246,13 @@ static inline bool cpu_physical_memory_is_clean(ram_addr_t addr)
bool code = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_CODE);
bool migration =
cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
- return !(vga && code && migration);
+ bool llsc_prot =
+#ifdef TARGET_HAS_LLSC_PROT
+ cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_LLSC_PROT);
+#else
+ false;
+#endif
+ return !(vga && code && migration && llsc_prot);
}
static inline uint8_t cpu_physical_memory_range_includes_clean(ram_addr_t start,
@@ -261,6 +273,12 @@ static inline uint8_t cpu_physical_memory_range_includes_clean(ram_addr_t start,
!cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_MIGRATION)) {
ret |= (1 << DIRTY_MEMORY_MIGRATION);
}
+#ifdef TARGET_HAS_LLSC_PROT
+ if (mask & (1 << DIRTY_MEMORY_LLSC_PROT) &&
+ !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_LLSC_PROT)) {
+ ret |= (1 << DIRTY_MEMORY_LLSC_PROT);
+ }
+#endif
return ret;
}
@@ -322,6 +340,12 @@ static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start,
bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx],
offset, next - page);
}
+#ifdef TARGET_HAS_LLSC_PROT
+ if (unlikely(mask & (1 << DIRTY_MEMORY_LLSC_PROT))) {
+ bitmap_set_atomic(blocks[DIRTY_MEMORY_LLSC_PROT]->blocks[idx],
+ offset, next - page);
+ }
+#endif
page = next;
idx++;
@@ -355,6 +379,8 @@ uint64_t cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
unsigned long hpratio = qemu_real_host_page_size() / TARGET_PAGE_SIZE;
unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
+ assert(0);
+
/* start address is aligned at the start of a word? */
if ((((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) &&
(hpratio == 1)) {
@@ -396,6 +422,12 @@ uint64_t cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
if (tcg_enabled()) {
qatomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset],
temp);
+#ifdef TARGET_HAS_LLSC_PROT
+ /* XXX? */
+ qatomic_or(&blocks[DIRTY_MEMORY_LLSC_PROT][idx][offset],
+ temp);
+#endif
+ assert(0);
}
}
@@ -408,7 +440,8 @@ uint64_t cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS);
} else {
- uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE;
+ uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL :
+ DIRTY_CLIENTS_ONESHOT;
if (!global_dirty_tracking) {
clients &= ~(1 << DIRTY_MEMORY_MIGRATION);
@@ -470,6 +503,9 @@ static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start,
cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_MIGRATION);
cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_VGA);
cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_CODE);
+#ifdef TARGET_HAS_LLSC_PROT
+ cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_LLSC_PROT);
+#endif
}
@@ -8,10 +8,20 @@
typedef struct RAMBlockNotifier RAMBlockNotifier;
+/* XXX: hack, need to work out how to do this */
+#ifndef TARGET_HAS_LLSC_PROT
+#define TARGET_HAS_LLSC_PROT
+#endif
+
#define DIRTY_MEMORY_VGA 0
#define DIRTY_MEMORY_CODE 1
#define DIRTY_MEMORY_MIGRATION 2
+#ifdef TARGET_HAS_LLSC_PROT
+#define DIRTY_MEMORY_LLSC_PROT 3
+#define DIRTY_MEMORY_NUM 4 /* num of dirty bits */
+#else
#define DIRTY_MEMORY_NUM 3 /* num of dirty bits */
+#endif
/* The dirty memory bitmap is split into fixed-size blocks to allow growth
* under RCU. The bitmap for a block can be accessed as follows:
@@ -489,6 +489,11 @@ struct CPUState {
uint64_t random_seed;
sigjmp_buf jmp_env;
+ int llsc_prot_block_size;
+ bool llsc_prot_active;
+ bool llsc_resolving;
+ hwaddr llsc_prot_address; /* ram_addr_t physical address of reservation */
+
QemuMutex work_mutex;
QSIMPLEQ_HEAD(, qemu_work_item) work_list;
@@ -274,11 +274,18 @@ static inline void tlb_n_used_entries_dec(CPUState *cpu, uintptr_t mmu_idx)
cpu->neg.tlb.d[mmu_idx].n_used_entries--;
}
+#ifdef TARGET_HAS_LLSC_PROT
+static QemuMutex llsc_prot_mutex;
+#endif
+
void tlb_init(CPUState *cpu)
{
int64_t now = get_clock_realtime();
int i;
+#ifdef TARGET_HAS_LLSC_PROT
+ qemu_mutex_init(&llsc_prot_mutex);
+#endif
qemu_spin_init(&cpu->neg.tlb.c.lock);
/* All tlbs are initialized flushed. */
@@ -1408,6 +1415,176 @@ static bool victim_tlb_hit(CPUState *cpu, size_t mmu_idx, size_t index,
return false;
}
+#ifdef TARGET_HAS_LLSC_PROT
+/*
+ * Remove a particular address from being watched by LLSC if it is
+ * no longer actively protected by any CPU.
+ */
+static void cpu_teardown_llsc_prot(ram_addr_t addr)
+{
+ CPUState *cpu;
+
+ addr &= TARGET_PAGE_MASK;
+
+ CPU_FOREACH(cpu) {
+ if ((cpu->llsc_prot_address & TARGET_PAGE_MASK) == addr) {
+ if (cpu->llsc_prot_active) {
+ return;
+ }
+ cpu->llsc_prot_address = -1ULL;
+ }
+ }
+ cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_LLSC_PROT);
+}
+
+/*
+ * Start protection of an address. cpu_resolve_llsc_begin() and
+ * cpu_resolve_llsc_check() will return true so long as another
+ * actor has not modified the line.
+ */
+void cpu_set_llsc_prot(CPUState *cpu, ram_addr_t addr)
+{
+ ram_addr_t old;
+
+ addr &= ~(cpu->llsc_prot_block_size - 1);
+
+ qemu_mutex_lock(&llsc_prot_mutex);
+ old = cpu->llsc_prot_address;
+ if ((addr & TARGET_PAGE_MASK) == (old & TARGET_PAGE_MASK)) {
+ old = -1ULL;
+ goto out;
+ }
+ cpu_physical_memory_test_and_clear_dirty(addr & TARGET_PAGE_MASK,
+ TARGET_PAGE_SIZE,
+ DIRTY_MEMORY_LLSC_PROT);
+out:
+ cpu->llsc_prot_address = addr;
+ cpu->llsc_prot_active = true;
+ if (old != -1ULL) {
+ cpu_teardown_llsc_prot(old);
+ }
+
+ qemu_mutex_unlock(&llsc_prot_mutex);
+}
+
+/*
+ * The store for store-conditional must be performed under the llsc_prot_mutex,
+ * but it must not go ahead if protection has been lost. The point of resolving
+ * conflicts happens at TLB probe time, so this can be achieved by taking the
+ * lock here, then checking our protection has not been invalidated and probing
+ * the TLB, then performing the store.
+ *
+ * The TLB probe must be non-faulting (to avoid problems with lock recursion).
+ * If the non-faulting probe fails then cpu_resolve_llsc_abort() must be called
+ * (and either perform a full probe and then retry, or perhaps could just fail
+ * the store-conditional).
+ *
+ * The TLB probe while holding the mutex may have to check and invalidate the
+ * protection on other CPUs and therefore it must hold the lock. If
+ * llsc_resolving is true then the lock is held and not acquired again.
+ */
+bool cpu_resolve_llsc_begin(CPUState *cpu)
+{
+ if (!cpu->llsc_prot_active) {
+ return false;
+ }
+ qemu_mutex_lock(&llsc_prot_mutex);
+ if (!cpu->llsc_prot_active) {
+ qemu_mutex_unlock(&llsc_prot_mutex);
+ return false;
+ }
+
+ g_assert(!cpu->llsc_resolving);
+ cpu->llsc_resolving = true;
+
+ return true;
+}
+
+void cpu_resolve_llsc_abort(CPUState *cpu)
+{
+ cpu->llsc_resolving = false;
+ qemu_mutex_unlock(&llsc_prot_mutex);
+}
+
+bool cpu_resolve_llsc_check(CPUState *cpu, ram_addr_t addr)
+{
+ g_assert(cpu->llsc_resolving);
+ g_assert(cpu->llsc_prot_active);
+
+ addr &= ~(cpu->llsc_prot_block_size - 1);
+ if (cpu->llsc_prot_address != addr) {
+ cpu->llsc_prot_active = false;
+ cpu->llsc_resolving = false;
+ qemu_mutex_unlock(&llsc_prot_mutex);
+ return false;
+ }
+
+ return true;
+}
+
+void cpu_resolve_llsc_success(CPUState *cpu)
+{
+ ram_addr_t addr;
+
+ addr = cpu->llsc_prot_address;
+ g_assert(addr != -1ULL);
+ assert(cpu->llsc_prot_active);
+ cpu->llsc_prot_active = false;
+
+ /* Leave the llsc_prot_address under active watch, for performance */
+// cpu->llsc_prot_address = -1ULL;
+// cpu_teardown_llsc_prot(addr);
+ g_assert(cpu->llsc_resolving);
+ cpu->llsc_resolving = false;
+
+ qemu_mutex_unlock(&llsc_prot_mutex);
+}
+
+static void other_cpus_clear_llsc_prot(CPUState *cpu, ram_addr_t addr,
+ unsigned size)
+{
+ CPUState *c;
+ ram_addr_t end;
+ bool teardown = false;
+
+ end = (addr + size - 1) & ~(cpu->llsc_prot_block_size - 1);
+ addr &= ~(cpu->llsc_prot_block_size - 1);
+
+ if (!cpu->llsc_resolving) {
+ qemu_mutex_lock(&llsc_prot_mutex);
+ }
+
+ CPU_FOREACH(c) {
+ ram_addr_t a = c->llsc_prot_address;
+
+ if (c == cpu) {
+ continue;
+ }
+ if (a == -1ULL) {
+ assert(!c->llsc_prot_active);
+ continue;
+ }
+ if (a == addr || a == end) {
+ if (c->llsc_prot_active) {
+ c->llsc_prot_active = false;
+ } else {
+ teardown = true;
+ }
+ }
+ }
+ if (teardown) {
+ cpu_teardown_llsc_prot(addr);
+ if (end != addr) {
+ cpu_teardown_llsc_prot(end);
+ }
+ }
+
+ if (!cpu->llsc_resolving) {
+ qemu_mutex_unlock(&llsc_prot_mutex);
+ }
+}
+#endif
+
static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
CPUTLBEntryFull *full, uintptr_t retaddr)
{
@@ -1419,11 +1596,18 @@ static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
tb_invalidate_phys_range_fast(ram_addr, size, retaddr);
}
+#ifdef TARGET_HAS_LLSC_PROT
+ if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_LLSC_PROT)) {
+ other_cpus_clear_llsc_prot(cpu, ram_addr, size);
+ }
+#endif
+
/*
* Set both VGA and migration bits for simplicity and to remove
- * the notdirty callback faster.
+ * the notdirty callback faster. Code and llsc_prot don't get set
+ * because we always want callbacks for them.
*/
- cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE);
+ cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_ONESHOT);
/* We remove the notdirty callback only if the code has been flushed. */
if (!cpu_physical_memory_is_clean(ram_addr)) {
@@ -250,6 +250,11 @@ static void cpu_common_initfn(Object *obj)
cpu->nr_cores = 1;
cpu->nr_threads = 1;
cpu->cflags_next_tb = -1;
+#ifdef TARGET_HAS_LLSC_PROT
+ cpu->llsc_prot_active = false;
+ cpu->llsc_prot_address = -1ULL;
+ cpu->llsc_resolving = false;
+#endif
qemu_mutex_init(&cpu->work_mutex);
qemu_lockcnt_init(&cpu->in_ioctl_lock);
@@ -1174,7 +1174,7 @@ int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova,
if (!bcontainer->dirty_pages_supported && !all_device_dirty_tracking) {
cpu_physical_memory_set_dirty_range(ram_addr, size,
tcg_enabled() ? DIRTY_CLIENTS_ALL :
- DIRTY_CLIENTS_NOCODE);
+ DIRTY_CLIENTS_ONESHOT);
return 0;
}
@@ -520,6 +520,7 @@ static bool vhost_section(struct vhost_dev *dev, MemoryRegionSection *section)
*/
handled_dirty = (1 << DIRTY_MEMORY_MIGRATION) |
(1 << DIRTY_MEMORY_CODE);
+ /* XXX: llsc? */
if (dirty_mask & ~handled_dirty) {
trace_vhost_reject_section(mr->name, 1);
@@ -1863,6 +1863,9 @@ uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr)
if (tcg_enabled() && rb) {
/* TCG only cares about dirty memory logging for RAM, not IOMMU. */
mask |= (1 << DIRTY_MEMORY_CODE);
+#ifdef TARGET_HAS_LLSC_PROT
+ mask |= (1 << DIRTY_MEMORY_LLSC_PROT);
+#endif
}
return mask;
}
@@ -2591,6 +2591,13 @@ static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
dirty_log_mask =
cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
}
+#ifdef TARGET_HAS_LLSC_PROT
+ if (dirty_log_mask & (1 << DIRTY_MEMORY_LLSC_PROT)) {
+ assert(tcg_enabled());
+ /* XXX should invalidate CPU's llsc_prot protections here? */
+ dirty_log_mask &= ~(1 << DIRTY_MEMORY_LLSC_PROT);
+ }
+#endif
if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
assert(tcg_enabled());
tb_invalidate_phys_range(addr, addr + length - 1);
@@ -286,7 +286,8 @@ void glue(address_space_stl_notdirty, SUFFIX)(ARG1_DECL,
stl_p(ptr, val);
dirty_log_mask = memory_region_get_dirty_log_mask(mr);
- dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
+// dirty_log_mask &= ~((1 << DIRTY_MEMORY_CODE) | (1 << DIRTY_MEMORY_LLSC_PROT));
+ dirty_log_mask &= DIRTY_CLIENTS_ONESHOT;
cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
4, dirty_log_mask);
r = MEMTX_OK;