Message ID | 1436516626-8322-14-git-send-email-a.rigo@virtualopensystems.com |
---|---|
State | New |
Headers | show |
Alvise Rigo <a.rigo@virtualopensystems.com> writes: > Exploiting the tcg_excl_access_lock, port the helper_{le,be}_st_name to > work in real multithreading. > > - The macro lookup_cpus_ll_addr now uses directly the > env->excl_protected_addr to invalidate others' LL/SC operations > > Suggested-by: Jani Kokkonen <jani.kokkonen@huawei.com> > Suggested-by: Claudio Fontana <claudio.fontana@huawei.com> > Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com> > --- > softmmu_template.h | 110 +++++++++++++++++++++++++++++++++++++++++++---------- > 1 file changed, 89 insertions(+), 21 deletions(-) > > diff --git a/softmmu_template.h b/softmmu_template.h > index bc767f6..522454f 100644 > --- a/softmmu_template.h > +++ b/softmmu_template.h > @@ -141,21 +141,24 @@ > vidx >= 0; \ > }) > > +#define EXCLUSIVE_RESET_ADDR ULLONG_MAX > + > +/* This macro requires the caller to have the tcg_excl_access_lock lock since > + * it modifies the excl_protected_hwaddr of a running vCPU. > + * The macros scans all the excl_protected_hwaddr of all the vCPUs and compare > + * them with the address the current vCPU is writing to. If there is a match, > + * we reset the value, making the SC fail. */ It would have been nice if we had started with a comment when the function^H^H^H^H^H macro was first introduced and then updated here. > #define lookup_cpus_ll_addr(addr) \ > ({ \ > CPUState *cpu; \ > CPUArchState *acpu; \ > - bool hit = false; \ > \ > CPU_FOREACH(cpu) { \ > acpu = (CPUArchState *)cpu->env_ptr; \ > if (cpu != current_cpu && acpu->excl_protected_hwaddr == addr) { \ > - hit = true; \ > - break; \ > + acpu->excl_protected_hwaddr = EXCLUSIVE_RESET_ADDR; \ > } \ > } \ > - \ > - hit; \ > }) My comment about using an inline function in the earlier patch stands. > > #ifndef SOFTMMU_CODE_ACCESS > @@ -439,18 +442,52 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, > * exclusive-protected memory. */ > hwaddr hw_addr = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; > > - bool set_to_dirty; > - > /* Two cases of invalidation: the current vCPU is writing to another > * vCPU's exclusive address or the vCPU that issued the LoadLink is > * writing to it, but not through a StoreCond. */ > - set_to_dirty = lookup_cpus_ll_addr(hw_addr); > - set_to_dirty |= env->ll_sc_context && > - (env->excl_protected_hwaddr == hw_addr); > + qemu_mutex_lock(&tcg_excl_access_lock); > + > + /* The macro lookup_cpus_ll_addr could have reset the exclusive > + * address. Fail the SC in this case. > + * N.B.: Here excl_succeeded == 0 means that we don't come from a > + * store conditional. */ > + if (env->excl_succeeded && > + (env->excl_protected_hwaddr == EXCLUSIVE_RESET_ADDR)) { > + env->excl_succeeded = 0; > + qemu_mutex_unlock(&tcg_excl_access_lock); > + > + return; > + } > + > + lookup_cpus_ll_addr(hw_addr); Add comment for side effect, also confused by the above comment given we call lookups_cpus_ll_addr() after the comment about it tweaking excl_succedded. > + > + if (!env->excl_succeeded) { > + if (env->ll_sc_context && > + (env->excl_protected_hwaddr == hw_addr)) { > + cpu_physical_memory_set_excl_dirty(hw_addr); > + } > + } else { > + if (cpu_physical_memory_excl_is_dirty(hw_addr) || > + env->excl_protected_hwaddr != hw_addr) { > + env->excl_protected_hwaddr = EXCLUSIVE_RESET_ADDR; > + qemu_mutex_unlock(&tcg_excl_access_lock); > + env->excl_succeeded = 0; > + > + return; > + } > + } I'm wondering if it can be more naturally written the other way round to aid comprehension: if (env->excl_succeeded) { if (cpu_physical_memory_excl_is_dirty(hw_addr) || env->excl_protected_hwaddr != hw_addr) { ..do stuff.. return } } else { if (env->ll_sc_context && (env->excl_protected_hwaddr == hw_addr)) { cpu_physical_memory_set_excl_dirty(hw_addr); } } Although now I'm confused as to why we push on in 3 of the 4 cases. > + > + haddr = addr + env->tlb_table[mmu_idx][index].addend; > + #if DATA_SIZE == 1 > + glue(glue(st, SUFFIX), _p)((uint8_t *)haddr, val); > + #else > + glue(glue(st, SUFFIX), _le_p)((uint8_t *)haddr, val); > + #endif > + > + env->excl_protected_hwaddr = EXCLUSIVE_RESET_ADDR; > + qemu_mutex_unlock(&tcg_excl_access_lock); > > - if (set_to_dirty) { > - cpu_physical_memory_set_excl_dirty(hw_addr); > - } /* the vCPU is legitimately writing to the protected address */ > + return; > } else { > if ((addr & (DATA_SIZE - 1)) != 0) { > goto do_unaligned_access; > @@ -537,18 +574,49 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, > * exclusive-protected memory. */ > hwaddr hw_addr = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; > > - bool set_to_dirty; > - > /* Two cases of invalidation: the current vCPU is writing to another > * vCPU's exclusive address or the vCPU that issued the LoadLink is > * writing to it, but not through a StoreCond. */ > - set_to_dirty = lookup_cpus_ll_addr(hw_addr); > - set_to_dirty |= env->ll_sc_context && > - (env->excl_protected_hwaddr == hw_addr); > + qemu_mutex_lock(&tcg_excl_access_lock); > + > + /* The macro lookup_cpus_ll_addr could have reset the exclusive > + * address. Fail the SC in this case. > + * N.B.: Here excl_succeeded == 0 means that we don't come from a > + * store conditional. */ > + if (env->excl_succeeded && > + (env->excl_protected_hwaddr == EXCLUSIVE_RESET_ADDR)) { > + env->excl_succeeded = 0; > + qemu_mutex_unlock(&tcg_excl_access_lock); > + > + return; > + } > + > + lookup_cpus_ll_addr(hw_addr); > + > + if (!env->excl_succeeded) { > + if (env->ll_sc_context && > + (env->excl_protected_hwaddr == hw_addr)) { > + cpu_physical_memory_set_excl_dirty(hw_addr); > + } > + } else { > + if (cpu_physical_memory_excl_is_dirty(hw_addr) || > + env->excl_protected_hwaddr != hw_addr) { > + env->excl_protected_hwaddr = EXCLUSIVE_RESET_ADDR; > + qemu_mutex_unlock(&tcg_excl_access_lock); > + env->excl_succeeded = 0; > + > + return; > + } > + } Given the amount of copy/paste between the two functions I wonder if there is some commonality to be re-factored out here? > > - if (set_to_dirty) { > - cpu_physical_memory_set_excl_dirty(hw_addr); > - } /* the vCPU is legitimately writing to the protected address */ > + haddr = addr + env->tlb_table[mmu_idx][index].addend; > + > + glue(glue(st, SUFFIX), _be_p)((uint8_t *)haddr, val); > + > + qemu_mutex_unlock(&tcg_excl_access_lock); > + env->excl_protected_hwaddr = EXCLUSIVE_RESET_ADDR; > + > + return; > } else { > if ((addr & (DATA_SIZE - 1)) != 0) { > goto do_unaligned_access;
On Fri, Jul 17, 2015 at 5:57 PM, Alex Bennée <alex.bennee@linaro.org> wrote: > > Alvise Rigo <a.rigo@virtualopensystems.com> writes: > >> Exploiting the tcg_excl_access_lock, port the helper_{le,be}_st_name to >> work in real multithreading. >> >> - The macro lookup_cpus_ll_addr now uses directly the >> env->excl_protected_addr to invalidate others' LL/SC operations >> >> Suggested-by: Jani Kokkonen <jani.kokkonen@huawei.com> >> Suggested-by: Claudio Fontana <claudio.fontana@huawei.com> >> Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com> >> --- >> softmmu_template.h | 110 +++++++++++++++++++++++++++++++++++++++++++---------- >> 1 file changed, 89 insertions(+), 21 deletions(-) >> >> diff --git a/softmmu_template.h b/softmmu_template.h >> index bc767f6..522454f 100644 >> --- a/softmmu_template.h >> +++ b/softmmu_template.h >> @@ -141,21 +141,24 @@ >> vidx >= 0; \ >> }) >> >> +#define EXCLUSIVE_RESET_ADDR ULLONG_MAX >> + >> +/* This macro requires the caller to have the tcg_excl_access_lock lock since >> + * it modifies the excl_protected_hwaddr of a running vCPU. >> + * The macros scans all the excl_protected_hwaddr of all the vCPUs and compare >> + * them with the address the current vCPU is writing to. If there is a match, >> + * we reset the value, making the SC fail. */ > > It would have been nice if we had started with a comment when the > function^H^H^H^H^H macro was first introduced and then updated here. OK. Related to this, I think to refactor the patches in such a way to drop the "move to multithreading" part, since it makes things more confusing (even if my intent was the opposite). > >> #define lookup_cpus_ll_addr(addr) \ >> ({ \ >> CPUState *cpu; \ >> CPUArchState *acpu; \ >> - bool hit = false; \ >> \ >> CPU_FOREACH(cpu) { \ >> acpu = (CPUArchState *)cpu->env_ptr; \ >> if (cpu != current_cpu && acpu->excl_protected_hwaddr == addr) { \ >> - hit = true; \ >> - break; \ >> + acpu->excl_protected_hwaddr = EXCLUSIVE_RESET_ADDR; \ >> } \ >> } \ >> - \ >> - hit; \ >> }) > > My comment about using an inline function in the earlier patch stands. > >> >> #ifndef SOFTMMU_CODE_ACCESS >> @@ -439,18 +442,52 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, >> * exclusive-protected memory. */ >> hwaddr hw_addr = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; >> >> - bool set_to_dirty; >> - >> /* Two cases of invalidation: the current vCPU is writing to another >> * vCPU's exclusive address or the vCPU that issued the LoadLink is >> * writing to it, but not through a StoreCond. */ >> - set_to_dirty = lookup_cpus_ll_addr(hw_addr); >> - set_to_dirty |= env->ll_sc_context && >> - (env->excl_protected_hwaddr == hw_addr); >> + qemu_mutex_lock(&tcg_excl_access_lock); >> + >> + /* The macro lookup_cpus_ll_addr could have reset the exclusive >> + * address. Fail the SC in this case. >> + * N.B.: Here excl_succeeded == 0 means that we don't come from a >> + * store conditional. */ >> + if (env->excl_succeeded && >> + (env->excl_protected_hwaddr == EXCLUSIVE_RESET_ADDR)) { >> + env->excl_succeeded = 0; >> + qemu_mutex_unlock(&tcg_excl_access_lock); >> + >> + return; >> + } >> + >> + lookup_cpus_ll_addr(hw_addr); > > Add comment for side effect, also confused by the above comment given we > call lookups_cpus_ll_addr() after the comment about it tweaking excl_succedded. I will improve all the comments to avoid any misinterpretation. I will also be more verbose in the cover letter about how the whole machinery works. > >> + >> + if (!env->excl_succeeded) { >> + if (env->ll_sc_context && >> + (env->excl_protected_hwaddr == hw_addr)) { >> + cpu_physical_memory_set_excl_dirty(hw_addr); >> + } >> + } else { >> + if (cpu_physical_memory_excl_is_dirty(hw_addr) || >> + env->excl_protected_hwaddr != hw_addr) { >> + env->excl_protected_hwaddr = EXCLUSIVE_RESET_ADDR; >> + qemu_mutex_unlock(&tcg_excl_access_lock); >> + env->excl_succeeded = 0; >> + >> + return; >> + } >> + } > > I'm wondering if it can be more naturally written the other way round to > aid comprehension: > > if (env->excl_succeeded) { > if (cpu_physical_memory_excl_is_dirty(hw_addr) || > env->excl_protected_hwaddr != hw_addr) { > ..do stuff.. > return > } > } else { > if (env->ll_sc_context && > (env->excl_protected_hwaddr == hw_addr)) { > cpu_physical_memory_set_excl_dirty(hw_addr); > } > } > > Although now I'm confused as to why we push on in 3 of the 4 cases. I will try to find another way to write this code, starting from adding more comments about the corner cases that we are addressing. > >> + >> + haddr = addr + env->tlb_table[mmu_idx][index].addend; >> + #if DATA_SIZE == 1 >> + glue(glue(st, SUFFIX), _p)((uint8_t *)haddr, val); >> + #else >> + glue(glue(st, SUFFIX), _le_p)((uint8_t *)haddr, val); >> + #endif >> + >> + env->excl_protected_hwaddr = EXCLUSIVE_RESET_ADDR; >> + qemu_mutex_unlock(&tcg_excl_access_lock); >> >> - if (set_to_dirty) { >> - cpu_physical_memory_set_excl_dirty(hw_addr); >> - } /* the vCPU is legitimately writing to the protected address */ >> + return; >> } else { >> if ((addr & (DATA_SIZE - 1)) != 0) { >> goto do_unaligned_access; >> @@ -537,18 +574,49 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, >> * exclusive-protected memory. */ >> hwaddr hw_addr = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; >> >> - bool set_to_dirty; >> - >> /* Two cases of invalidation: the current vCPU is writing to another >> * vCPU's exclusive address or the vCPU that issued the LoadLink is >> * writing to it, but not through a StoreCond. */ >> - set_to_dirty = lookup_cpus_ll_addr(hw_addr); >> - set_to_dirty |= env->ll_sc_context && >> - (env->excl_protected_hwaddr == hw_addr); >> + qemu_mutex_lock(&tcg_excl_access_lock); >> + >> + /* The macro lookup_cpus_ll_addr could have reset the exclusive >> + * address. Fail the SC in this case. >> + * N.B.: Here excl_succeeded == 0 means that we don't come from a >> + * store conditional. */ >> + if (env->excl_succeeded && >> + (env->excl_protected_hwaddr == EXCLUSIVE_RESET_ADDR)) { >> + env->excl_succeeded = 0; >> + qemu_mutex_unlock(&tcg_excl_access_lock); >> + >> + return; >> + } >> + >> + lookup_cpus_ll_addr(hw_addr); >> + >> + if (!env->excl_succeeded) { >> + if (env->ll_sc_context && >> + (env->excl_protected_hwaddr == hw_addr)) { >> + cpu_physical_memory_set_excl_dirty(hw_addr); >> + } >> + } else { >> + if (cpu_physical_memory_excl_is_dirty(hw_addr) || >> + env->excl_protected_hwaddr != hw_addr) { >> + env->excl_protected_hwaddr = EXCLUSIVE_RESET_ADDR; >> + qemu_mutex_unlock(&tcg_excl_access_lock); >> + env->excl_succeeded = 0; >> + >> + return; >> + } >> + } > > Given the amount of copy/paste between the two functions I wonder if > there is some commonality to be re-factored out here? Either we create inline functions outside this file (like in cputlb.c) or we define new macros. I don't see other viable ways to re-factor this code. Thank you, alvise > >> >> - if (set_to_dirty) { >> - cpu_physical_memory_set_excl_dirty(hw_addr); >> - } /* the vCPU is legitimately writing to the protected address */ >> + haddr = addr + env->tlb_table[mmu_idx][index].addend; >> + >> + glue(glue(st, SUFFIX), _be_p)((uint8_t *)haddr, val); >> + >> + qemu_mutex_unlock(&tcg_excl_access_lock); >> + env->excl_protected_hwaddr = EXCLUSIVE_RESET_ADDR; >> + >> + return; >> } else { >> if ((addr & (DATA_SIZE - 1)) != 0) { >> goto do_unaligned_access; > > -- > Alex Bennée
diff --git a/softmmu_template.h b/softmmu_template.h index bc767f6..522454f 100644 --- a/softmmu_template.h +++ b/softmmu_template.h @@ -141,21 +141,24 @@ vidx >= 0; \ }) +#define EXCLUSIVE_RESET_ADDR ULLONG_MAX + +/* This macro requires the caller to have the tcg_excl_access_lock lock since + * it modifies the excl_protected_hwaddr of a running vCPU. + * The macros scans all the excl_protected_hwaddr of all the vCPUs and compare + * them with the address the current vCPU is writing to. If there is a match, + * we reset the value, making the SC fail. */ #define lookup_cpus_ll_addr(addr) \ ({ \ CPUState *cpu; \ CPUArchState *acpu; \ - bool hit = false; \ \ CPU_FOREACH(cpu) { \ acpu = (CPUArchState *)cpu->env_ptr; \ if (cpu != current_cpu && acpu->excl_protected_hwaddr == addr) { \ - hit = true; \ - break; \ + acpu->excl_protected_hwaddr = EXCLUSIVE_RESET_ADDR; \ } \ } \ - \ - hit; \ }) #ifndef SOFTMMU_CODE_ACCESS @@ -439,18 +442,52 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, * exclusive-protected memory. */ hwaddr hw_addr = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; - bool set_to_dirty; - /* Two cases of invalidation: the current vCPU is writing to another * vCPU's exclusive address or the vCPU that issued the LoadLink is * writing to it, but not through a StoreCond. */ - set_to_dirty = lookup_cpus_ll_addr(hw_addr); - set_to_dirty |= env->ll_sc_context && - (env->excl_protected_hwaddr == hw_addr); + qemu_mutex_lock(&tcg_excl_access_lock); + + /* The macro lookup_cpus_ll_addr could have reset the exclusive + * address. Fail the SC in this case. + * N.B.: Here excl_succeeded == 0 means that we don't come from a + * store conditional. */ + if (env->excl_succeeded && + (env->excl_protected_hwaddr == EXCLUSIVE_RESET_ADDR)) { + env->excl_succeeded = 0; + qemu_mutex_unlock(&tcg_excl_access_lock); + + return; + } + + lookup_cpus_ll_addr(hw_addr); + + if (!env->excl_succeeded) { + if (env->ll_sc_context && + (env->excl_protected_hwaddr == hw_addr)) { + cpu_physical_memory_set_excl_dirty(hw_addr); + } + } else { + if (cpu_physical_memory_excl_is_dirty(hw_addr) || + env->excl_protected_hwaddr != hw_addr) { + env->excl_protected_hwaddr = EXCLUSIVE_RESET_ADDR; + qemu_mutex_unlock(&tcg_excl_access_lock); + env->excl_succeeded = 0; + + return; + } + } + + haddr = addr + env->tlb_table[mmu_idx][index].addend; + #if DATA_SIZE == 1 + glue(glue(st, SUFFIX), _p)((uint8_t *)haddr, val); + #else + glue(glue(st, SUFFIX), _le_p)((uint8_t *)haddr, val); + #endif + + env->excl_protected_hwaddr = EXCLUSIVE_RESET_ADDR; + qemu_mutex_unlock(&tcg_excl_access_lock); - if (set_to_dirty) { - cpu_physical_memory_set_excl_dirty(hw_addr); - } /* the vCPU is legitimately writing to the protected address */ + return; } else { if ((addr & (DATA_SIZE - 1)) != 0) { goto do_unaligned_access; @@ -537,18 +574,49 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, * exclusive-protected memory. */ hwaddr hw_addr = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; - bool set_to_dirty; - /* Two cases of invalidation: the current vCPU is writing to another * vCPU's exclusive address or the vCPU that issued the LoadLink is * writing to it, but not through a StoreCond. */ - set_to_dirty = lookup_cpus_ll_addr(hw_addr); - set_to_dirty |= env->ll_sc_context && - (env->excl_protected_hwaddr == hw_addr); + qemu_mutex_lock(&tcg_excl_access_lock); + + /* The macro lookup_cpus_ll_addr could have reset the exclusive + * address. Fail the SC in this case. + * N.B.: Here excl_succeeded == 0 means that we don't come from a + * store conditional. */ + if (env->excl_succeeded && + (env->excl_protected_hwaddr == EXCLUSIVE_RESET_ADDR)) { + env->excl_succeeded = 0; + qemu_mutex_unlock(&tcg_excl_access_lock); + + return; + } + + lookup_cpus_ll_addr(hw_addr); + + if (!env->excl_succeeded) { + if (env->ll_sc_context && + (env->excl_protected_hwaddr == hw_addr)) { + cpu_physical_memory_set_excl_dirty(hw_addr); + } + } else { + if (cpu_physical_memory_excl_is_dirty(hw_addr) || + env->excl_protected_hwaddr != hw_addr) { + env->excl_protected_hwaddr = EXCLUSIVE_RESET_ADDR; + qemu_mutex_unlock(&tcg_excl_access_lock); + env->excl_succeeded = 0; + + return; + } + } - if (set_to_dirty) { - cpu_physical_memory_set_excl_dirty(hw_addr); - } /* the vCPU is legitimately writing to the protected address */ + haddr = addr + env->tlb_table[mmu_idx][index].addend; + + glue(glue(st, SUFFIX), _be_p)((uint8_t *)haddr, val); + + qemu_mutex_unlock(&tcg_excl_access_lock); + env->excl_protected_hwaddr = EXCLUSIVE_RESET_ADDR; + + return; } else { if ((addr & (DATA_SIZE - 1)) != 0) { goto do_unaligned_access;
Exploiting the tcg_excl_access_lock, port the helper_{le,be}_st_name to work in real multithreading. - The macro lookup_cpus_ll_addr now uses directly the env->excl_protected_addr to invalidate others' LL/SC operations Suggested-by: Jani Kokkonen <jani.kokkonen@huawei.com> Suggested-by: Claudio Fontana <claudio.fontana@huawei.com> Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com> --- softmmu_template.h | 110 +++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 89 insertions(+), 21 deletions(-)