@@ -2361,6 +2361,14 @@ void cpu_loop (CPUState *env)
that the intr_flag should be cleared. */
env->intr_flag = 0;
+ /* Similarly with the lock_flag, where "clear" is -1 here.
+ ??? Note that it's not possible to single-step through
+ a ldl_l/stl_c sequence on real hardware, but let's see
+ if we can do better than that in the emulator. */
+ if (trapnr != EXCP_DEBUG) {
+ env->lock_addr = -1;
+ }
+
switch (trapnr) {
case EXCP_RESET:
fprintf(stderr, "Reset requested. Exit\n");
@@ -2512,6 +2520,9 @@ void cpu_loop (CPUState *env)
case EXCP_DEBUG:
info.si_signo = gdb_handlesig (env, TARGET_SIGTRAP);
if (info.si_signo) {
+ /* ??? See above re single-stepping and ldl_l. If we're
+ actually going to deliver a signal, break the lock. */
+ env->lock_addr = -1;
info.si_errno = 0;
info.si_code = TARGET_TRAP_BRKPT;
queue_signal(env, info.si_signo, &info);
@@ -355,11 +355,12 @@ struct CPUAlphaState {
uint64_t ir[31];
float64 fir[31];
uint64_t pc;
- uint64_t lock;
uint32_t pcc[2];
uint64_t ipr[IPR_LAST];
uint64_t ps;
uint64_t unique;
+ uint64_t lock_addr;
+ uint64_t lock_value;
float_status fp_status;
/* The following fields make up the FPCR, but in FP_STATUS format. */
uint8_t fpcr_exc_status;
@@ -556,12 +556,14 @@ void cpu_dump_state (CPUState *env, FILE *f,
if ((i % 3) == 2)
cpu_fprintf(f, "\n");
}
- cpu_fprintf(f, "\n");
+
+ cpu_fprintf(f, "lock_a " TARGET_FMT_lx " lock_v " TARGET_FMT_lx "\n",
+ env->lock_addr, env->lock_value);
+
for (i = 0; i < 31; i++) {
cpu_fprintf(f, "FIR%02d " TARGET_FMT_lx " ", i,
*((uint64_t *)(&env->fir[i])));
if ((i % 3) == 2)
cpu_fprintf(f, "\n");
}
- cpu_fprintf(f, "\nlock " TARGET_FMT_lx "\n", env->lock);
}
@@ -99,6 +99,9 @@ DEF_HELPER_1(ieee_input, i64, i64)
DEF_HELPER_1(ieee_input_cmp, i64, i64)
DEF_HELPER_1(ieee_input_s, i64, i64)
+DEF_HELPER_2(stl_c, i64, i64, i64)
+DEF_HELPER_2(stq_c, i64, i64, i64)
+
#if !defined (CONFIG_USER_ONLY)
DEF_HELPER_0(hw_rei, void)
DEF_HELPER_1(hw_ret, void, i64)
@@ -1152,6 +1152,74 @@ uint64_t helper_cvtqg (uint64_t a)
return float64_to_g(fr);
}
+uint64_t helper_stl_c (uint64_t addr, uint64_t val)
+{
+ uint64_t ret = 0;
+
+ /* The resultant virtual and physical address must specify a location
+ within the same natually aligned 16-byte block as the preceeding
+ load instruction. Note that (1) we're only checking the physical
+ address here, since ADDR has already been through virt_to_phys,
+ and (2) LOCK_ADDR has already been masked with -16. We do this
+ ahead of time so that we can assign LOCK_ADDR = -1 to force a
+ failure here. */
+ /* ??? The "16" in the spec is no doubt the minimum architectural
+ cacheline size. If we are attempting to model the real hardware
+ implementation, we should probably expand this to the real cache
+ line size. But this is certainly good enough for now. */
+ if ((addr & -16) == env->lock_addr) {
+ int32_t *host_addr;
+
+#if defined(CONFIG_USER_ONLY)
+ host_addr = (int32_t *)addr;
+#else
+ /* FIXME */
+ cpu_abort();
+#endif
+
+ /* Emulate the ldl_l/stl_c pair with a compare-and-swap. */
+ ret = __sync_bool_compare_and_swap(host_addr,
+ (int32_t)env->lock_value,
+ (int32_t)val);
+ }
+
+ env->lock_addr = -1;
+ return ret;
+}
+
+uint64_t helper_stq_c (uint64_t addr, uint64_t val)
+{
+ uint64_t ret = 0;
+
+ /* The resultant virtual and physical address must specify a location
+ within the same natually aligned 16-byte block as the preceeding
+ load instruction. Note that (1) we're only checking the physical
+ address here, since ADDR has already been through virt_to_phys,
+ and (2) LOCK_ADDR has already been masked with -16. We do this
+ ahead of time so that we can assign LOCK_ADDR = -1 to force a
+ failure here. */
+ /* ??? The "16" in the spec is no doubt the minimum architectural
+ cacheline size. If we are attempting to model the real hardware
+ implementation, we should probably expand this to the real cache
+ line size. But this is certainly good enough for now. */
+ if ((addr & -16) == env->lock_addr) {
+ uint64_t *host_addr;
+
+#if defined(CONFIG_USER_ONLY)
+ host_addr = (uint64_t *)addr;
+#else
+ /* FIXME */
+ cpu_abort();
+#endif
+
+ /* Emulate the ldq_l/stq_c pair with a compare-and-swap. */
+ ret = __sync_bool_compare_and_swap(host_addr, env->lock_value, val);
+ }
+
+ env->lock_addr = -1;
+ return ret;
+}
+
/* PALcode support special instructions */
#if !defined (CONFIG_USER_ONLY)
void helper_hw_rei (void)
@@ -1159,6 +1227,7 @@ void helper_hw_rei (void)
env->pc = env->ipr[IPR_EXC_ADDR] & ~3;
env->ipr[IPR_EXC_ADDR] = env->ipr[IPR_EXC_ADDR] & 1;
env->intr_flag = 0;
+ env->lock_addr = -1;
/* XXX: re-enable interrupts and memory mapping */
}
@@ -1167,6 +1236,7 @@ void helper_hw_ret (uint64_t a)
env->pc = a & ~3;
env->ipr[IPR_EXC_ADDR] = a & 1;
env->intr_flag = 0;
+ env->lock_addr = -1;
/* XXX: re-enable interrupts and memory mapping */
}
@@ -82,7 +82,8 @@ static TCGv_ptr cpu_env;
static TCGv cpu_ir[31];
static TCGv cpu_fir[31];
static TCGv cpu_pc;
-static TCGv cpu_lock;
+static TCGv cpu_lock_addr;
+static TCGv cpu_lock_value;
#ifdef CONFIG_USER_ONLY
static TCGv cpu_uniq;
#endif
@@ -119,8 +120,12 @@ static void alpha_translate_init(void)
cpu_pc = tcg_global_mem_new_i64(TCG_AREG0,
offsetof(CPUState, pc), "pc");
- cpu_lock = tcg_global_mem_new_i64(TCG_AREG0,
- offsetof(CPUState, lock), "lock");
+ cpu_lock_addr = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, lock_addr),
+ "lock_addr");
+ cpu_lock_value = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUState, lock_value),
+ "lock_value");
#ifdef CONFIG_USER_ONLY
cpu_uniq = tcg_global_mem_new_i64(TCG_AREG0,
@@ -181,16 +186,33 @@ static inline void gen_qemu_lds(TCGv t0, TCGv t1, int flags)
tcg_temp_free(tmp);
}
+static void gen_virt_to_phys(TCGv out, TCGv in, int store)
+{
+#if defined(CONFIG_USER_ONLY)
+ tcg_gen_addi_i64(out, in, GUEST_BASE);
+#else
+ if (store) {
+ gen_helper_st_virt_to_phys(out, in);
+ } else {
+ gen_helper_ld_virt_to_phys(out, in);
+ }
+#endif
+}
+
static inline void gen_qemu_ldl_l(TCGv t0, TCGv t1, int flags)
{
- tcg_gen_mov_i64(cpu_lock, t1);
tcg_gen_qemu_ld32s(t0, t1, flags);
+ gen_virt_to_phys(cpu_lock_addr, t1, 0);
+ tcg_gen_andi_i64(cpu_lock_addr, cpu_lock_addr, -16);
+ tcg_gen_mov_i64(cpu_lock_value, t0);
}
static inline void gen_qemu_ldq_l(TCGv t0, TCGv t1, int flags)
{
- tcg_gen_mov_i64(cpu_lock, t1);
tcg_gen_qemu_ld64(t0, t1, flags);
+ gen_virt_to_phys(cpu_lock_addr, t1, 0);
+ tcg_gen_andi_i64(cpu_lock_addr, cpu_lock_addr, -16);
+ tcg_gen_mov_i64(cpu_lock_value, t0);
}
static inline void gen_load_mem(DisasContext *ctx,
@@ -199,25 +221,31 @@ static inline void gen_load_mem(DisasContext *ctx,
int ra, int rb, int32_t disp16, int fp,
int clear)
{
- TCGv addr;
+ TCGv addr, va;
- if (unlikely(ra == 31))
+ /* LDQ_U with ra $31 is UNOP. Other various loads are forms of
+ prefetches, which we can treat as nops. No worries about
+ missed exceptions here. */
+ if (unlikely(ra == 31)) {
return;
+ }
addr = tcg_temp_new();
if (rb != 31) {
tcg_gen_addi_i64(addr, cpu_ir[rb], disp16);
- if (clear)
+ if (clear) {
tcg_gen_andi_i64(addr, addr, ~0x7);
+ }
} else {
- if (clear)
+ if (clear) {
disp16 &= ~0x7;
+ }
tcg_gen_movi_i64(addr, disp16);
}
- if (fp)
- tcg_gen_qemu_load(cpu_fir[ra], addr, ctx->mem_idx);
- else
- tcg_gen_qemu_load(cpu_ir[ra], addr, ctx->mem_idx);
+
+ va = (fp ? cpu_fir[ra] : cpu_ir[ra]);
+ tcg_gen_qemu_load(va, addr, ctx->mem_idx);
+
tcg_temp_free(addr);
}
@@ -253,71 +281,52 @@ static inline void gen_qemu_sts(TCGv t0, TCGv t1, int flags)
static inline void gen_qemu_stl_c(TCGv t0, TCGv t1, int flags)
{
- int l1, l2;
-
- l1 = gen_new_label();
- l2 = gen_new_label();
- tcg_gen_brcond_i64(TCG_COND_NE, cpu_lock, t1, l1);
- tcg_gen_qemu_st32(t0, t1, flags);
- tcg_gen_movi_i64(t0, 1);
- tcg_gen_br(l2);
- gen_set_label(l1);
- tcg_gen_movi_i64(t0, 0);
- gen_set_label(l2);
- tcg_gen_movi_i64(cpu_lock, -1);
+ TCGv va = tcg_temp_new();
+ gen_virt_to_phys(va, t1, 1);
+ gen_helper_stl_c(t0, va, t0);
+ tcg_temp_free(va);
}
static inline void gen_qemu_stq_c(TCGv t0, TCGv t1, int flags)
{
- int l1, l2;
-
- l1 = gen_new_label();
- l2 = gen_new_label();
- tcg_gen_brcond_i64(TCG_COND_NE, cpu_lock, t1, l1);
- tcg_gen_qemu_st64(t0, t1, flags);
- tcg_gen_movi_i64(t0, 1);
- tcg_gen_br(l2);
- gen_set_label(l1);
- tcg_gen_movi_i64(t0, 0);
- gen_set_label(l2);
- tcg_gen_movi_i64(cpu_lock, -1);
+ TCGv va = tcg_temp_new();
+ gen_virt_to_phys(va, t1, 1);
+ gen_helper_stq_c(t0, va, t0);
+ tcg_temp_free(va);
}
static inline void gen_store_mem(DisasContext *ctx,
void (*tcg_gen_qemu_store)(TCGv t0, TCGv t1,
int flags),
int ra, int rb, int32_t disp16, int fp,
- int clear, int local)
+ int clear)
{
- TCGv addr;
- if (local)
- addr = tcg_temp_local_new();
- else
- addr = tcg_temp_new();
+ TCGv addr, va;
+
+ addr = tcg_temp_new();
if (rb != 31) {
tcg_gen_addi_i64(addr, cpu_ir[rb], disp16);
- if (clear)
+ if (clear) {
tcg_gen_andi_i64(addr, addr, ~0x7);
+ }
} else {
- if (clear)
+ if (clear) {
disp16 &= ~0x7;
+ }
tcg_gen_movi_i64(addr, disp16);
}
- if (ra != 31) {
- if (fp)
- tcg_gen_qemu_store(cpu_fir[ra], addr, ctx->mem_idx);
- else
- tcg_gen_qemu_store(cpu_ir[ra], addr, ctx->mem_idx);
+
+ if (ra == 31) {
+ va = tcg_const_i64(0);
} else {
- TCGv zero;
- if (local)
- zero = tcg_const_local_i64(0);
- else
- zero = tcg_const_i64(0);
- tcg_gen_qemu_store(zero, addr, ctx->mem_idx);
- tcg_temp_free(zero);
+ va = (fp ? cpu_fir[ra] : cpu_ir[ra]);
}
+ tcg_gen_qemu_store(va, addr, ctx->mem_idx);
+
tcg_temp_free(addr);
+ if (ra == 31) {
+ tcg_temp_free(va);
+ }
}
static int use_goto_tb(DisasContext *ctx, uint64_t dest)
@@ -1530,15 +1539,15 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn)
break;
case 0x0D:
/* STW */
- gen_store_mem(ctx, &tcg_gen_qemu_st16, ra, rb, disp16, 0, 0, 0);
+ gen_store_mem(ctx, &tcg_gen_qemu_st16, ra, rb, disp16, 0, 0);
break;
case 0x0E:
/* STB */
- gen_store_mem(ctx, &tcg_gen_qemu_st8, ra, rb, disp16, 0, 0, 0);
+ gen_store_mem(ctx, &tcg_gen_qemu_st8, ra, rb, disp16, 0, 0);
break;
case 0x0F:
/* STQ_U */
- gen_store_mem(ctx, &tcg_gen_qemu_st64, ra, rb, disp16, 0, 1, 0);
+ gen_store_mem(ctx, &tcg_gen_qemu_st64, ra, rb, disp16, 0, 1);
break;
case 0x10:
switch (fn7) {
@@ -2968,19 +2977,19 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn)
break;
case 0x24:
/* STF */
- gen_store_mem(ctx, &gen_qemu_stf, ra, rb, disp16, 1, 0, 0);
+ gen_store_mem(ctx, &gen_qemu_stf, ra, rb, disp16, 1, 0);
break;
case 0x25:
/* STG */
- gen_store_mem(ctx, &gen_qemu_stg, ra, rb, disp16, 1, 0, 0);
+ gen_store_mem(ctx, &gen_qemu_stg, ra, rb, disp16, 1, 0);
break;
case 0x26:
/* STS */
- gen_store_mem(ctx, &gen_qemu_sts, ra, rb, disp16, 1, 0, 0);
+ gen_store_mem(ctx, &gen_qemu_sts, ra, rb, disp16, 1, 0);
break;
case 0x27:
/* STT */
- gen_store_mem(ctx, &tcg_gen_qemu_st64, ra, rb, disp16, 1, 0, 0);
+ gen_store_mem(ctx, &tcg_gen_qemu_st64, ra, rb, disp16, 1, 0);
break;
case 0x28:
/* LDL */
@@ -3000,19 +3009,19 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn)
break;
case 0x2C:
/* STL */
- gen_store_mem(ctx, &tcg_gen_qemu_st32, ra, rb, disp16, 0, 0, 0);
+ gen_store_mem(ctx, &tcg_gen_qemu_st32, ra, rb, disp16, 0, 0);
break;
case 0x2D:
/* STQ */
- gen_store_mem(ctx, &tcg_gen_qemu_st64, ra, rb, disp16, 0, 0, 0);
+ gen_store_mem(ctx, &tcg_gen_qemu_st64, ra, rb, disp16, 0, 0);
break;
case 0x2E:
/* STL_C */
- gen_store_mem(ctx, &gen_qemu_stl_c, ra, rb, disp16, 0, 0, 1);
+ gen_store_mem(ctx, &gen_qemu_stl_c, ra, rb, disp16, 0, 0);
break;
case 0x2F:
/* STQ_C */
- gen_store_mem(ctx, &gen_qemu_stq_c, ra, rb, disp16, 0, 0, 1);
+ gen_store_mem(ctx, &gen_qemu_stq_c, ra, rb, disp16, 0, 0);
break;
case 0x30:
/* BR */
@@ -3279,6 +3288,7 @@ CPUAlphaState * cpu_alpha_init (const char *cpu_model)
#else
pal_init(env);
#endif
+ env->lock_addr = -1;
/* Initialize IPR */
#if defined (CONFIG_USER_ONLY)
Use __sync_bool_compare_and_swap to yield correctly atomic results. As yet, this assumes running on an strict-memory-ordering host (i.e. x86), since we're still "implementing" the memory-barrier instructions as nops. Rename the "lock" cpu field to "lock_addr" and add a "lock_value" field to be used as the "old" value for the compare-and-swap. Use -1 in the lock_addr field to indicate no lock held. Break the lock when processing any sort of exception. Signed-off-by: Richard Henderson <rth@twiddle.net> --- linux-user/main.c | 11 ++++ target-alpha/cpu.h | 3 +- target-alpha/helper.c | 6 +- target-alpha/helper.h | 3 + target-alpha/op_helper.c | 70 ++++++++++++++++++++++ target-alpha/translate.c | 146 ++++++++++++++++++++++++--------------------- 6 files changed, 168 insertions(+), 71 deletions(-)