@@ -7197,6 +7197,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
/* misc */
case 0x90: /* nop */
/* XXX: correct lock test for all insn */
+ fprintf(stderr, "nop @ %016lx\n", pc_start);
if (prefixes & PREFIX_LOCK) {
goto illegal_op;
}
-------------------------------[ debug patch]---------------------------
The output it produces leading up to the abort quoted above is:
nop @ 00000000ffffffe4
nop @ 00000000ffffffe5
nop @ 00000000ffffffe6
nop @ 00000000ffffffe7
nop @ 00000000fffffff5
nop @ 00000000fffffff6
nop @ 00000000fffffff7
nop @ 00000000fffffff8
nop @ 00000000fffffff9
nop @ 00000000fffffffa
nop @ 00000000fffffffb
nop @ 00000000fffffffc
nop @ 00000000fffffffd
nop @ 00000000fffffffe
nop @ 00000000ffffffff
qemu: fatal: Trying to execute code outside RAM or ROM at 0x0000000100000000
Hence "nop slide".
Peeking into the coredump triggered by abort(), the backtrace is as follows:
#0 0x00007fd53c02b8a5 in raise (sig=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:64
#1 0x00007fd53c02d085 in abort () at abort.c:92
#2 0x00007fd5428d0333 in cpu_abort (env=0x7fd544b89c10, fmt=
0x7fd542a47440 "Trying to execute code outside RAM or ROM at 0x%016lx\n")
at /home/lacos/src/upstream/qemu/exec.c:542
#3 0x00007fd5428c9aa4 in get_page_addr_code (env1=0x7fd544b89c10, addr=4294967296)
at /home/lacos/src/upstream/qemu/cputlb.c:338
#4 0x00007fd5429de266 in tb_gen_code (env=0x7fd544b89c10, pc=4294967268, cs_base=0, flags=4244148, cflags=0)
at /home/lacos/src/upstream/qemu/translate-all.c:966
#5 0x00007fd5428c431b in tb_find_slow (env=0x7fd544b89c10, pc=4294967268, cs_base=0, flags=4244148)
at /home/lacos/src/upstream/qemu/cpu-exec.c:139
#6 0x00007fd5428c44c4 in tb_find_fast (env=0x7fd544b89c10) at /home/lacos/src/upstream/qemu/cpu-exec.c:166
#7 0x00007fd5428c4c78 in cpu_x86_exec (env=0x7fd544b89c10) at /home/lacos/src/upstream/qemu/cpu-exec.c:593
#8 0x00007fd5428c8058 in tcg_cpu_exec (env=0x7fd544b89c10) at /home/lacos/src/upstream/qemu/cpus.c:1144
#9 0x00007fd5428c81a3 in tcg_exec_all () at /home/lacos/src/upstream/qemu/cpus.c:1177
#10 0x00007fd5428c7321 in qemu_tcg_cpu_thread_fn (arg=0x7fd544b89b00) at /home/lacos/src/upstream/qemu/cpus.c:844
#11 0x00007fd540ab4851 in start_thread (arg=0x7fd51a5e7700) at pthread_create.c:301
#12 0x00007fd53c0e190d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:115
Frames #4 and #3 are interesting, I think they explain the PC values
we're seeing: we're at 0xffffffe4 (frame #4, tb_gen_code(),
pc=4294967268):
http://git.qemu.org/?p=qemu.git;a=blob;f=translate-all.c;h=211be314cbeea7c076031a3c0abe8c0ada62152b;hb=HEAD#l933
The call to frame #3 occurs on line 966.
This is what happens in this function (I think!)
tb_gen_code() [translate-all.c]
cpu_gen_code() [called from translate-all.c:958]
gen_intermediate_code() [target-i386/translate.c]
gen_intermediate_code_internal()
disas_insn()
get_page_addr_code() [called from translate-all.c:966]
disas_insn() is the function that prints the NOP slide (one NOP at a
time), and it is also the function that commit 4a6fd938, bisected by
Duane, modifies.
So, basically tb_gen_code() generates a block of code ("translation
block" I suppose) with cpu_gen_code(), then checks if the generated
block still fits on the same page (see the assignment to "virt_page2"
and below, under the git link).
According to gdb, the translation block generated for this sequence of
NOPs starting at 0xffffffe4 has size 140. Consequently it hangs over
into the next page (virt_page2 == 4GB, second arg for frame #3,
get_page_addr_code()), and this triggers the abort.
The translated code is as follows:
(gdb) frame 4
#4 0x00007fd5429de266 in tb_gen_code (env=0x7fd544b89c10, pc=4294967268, cs_base=0, flags=4244148, cflags=0)
at /home/lacos/src/upstream/qemu/translate-all.c:966
966 phys_page2 = get_page_addr_code(env, virt_page2);
(gdb) print *tb
$1 = {pc = 4294967268, cs_base = 0, flags = 4244148, size = 140, cflags
= 0, tc_ptr = 0x7fd52a161e30 "A\213nÈ\205í\017\205è\031",
phys_hash_next = 0x0, page_next = {0x0, 0x0}, page_addr = {0, 0},
tb_next_offset = {65535, 65535}, tb_jmp_offset = {0, 0}, jmp_next =
{0x0, 0x0}, jmp_first = 0x0, icount = 76}
(gdb) x/140xb tb->tc_ptr
0x7fd52a161e30 <code_gen_buffer+12033584>: 0x41 0x8b 0x6e 0xc8 0x85 0xed 0x0f 0x85
0x7fd52a161e38 <code_gen_buffer+12033592>: 0xe8 0x19 0x00 0x00 0x49 0x8b 0x2e 0x48
0x7fd52a161e40 <code_gen_buffer+12033600>: 0x8b 0xfd 0x48 0x8b 0xf5 0x48 0xc1 0xef
0x7fd52a161e48 <code_gen_buffer+12033608>: 0x07 0x48 0x81 0xe6 0x00 0xf0 0xff 0xff
0x7fd52a161e50 <code_gen_buffer+12033616>: 0x81 0xe7 0xe0 0x1f 0x00 0x00 0x49 0x8d
0x7fd52a161e58 <code_gen_buffer+12033624>: 0xbc 0x3e 0x28 0x05 0x00 0x00 0x48 0x3b
0x7fd52a161e60 <code_gen_buffer+12033632>: 0x37 0x48 0x8b 0xf5 0x0f 0x85 0xc9 0x19
0x7fd52a161e68 <code_gen_buffer+12033640>: 0x00 0x00 0x48 0x03 0x77 0x18 0x0f 0xb6
0x7fd52a161e70 <code_gen_buffer+12033648>: 0x1e 0x48 0x03 0xdd 0x48 0x8b 0xfd 0x48
0x7fd52a161e78 <code_gen_buffer+12033656>: 0x8b 0xf5 0x48 0xc1 0xef 0x07 0x48 0x81
0x7fd52a161e80 <code_gen_buffer+12033664>: 0xe6 0x00 0xf0 0xff 0xff 0x81 0xe7 0xe0
0x7fd52a161e88 <code_gen_buffer+12033672>: 0x1f 0x00 0x00 0x49 0x8d 0xbc 0x3e 0x30
0x7fd52a161e90 <code_gen_buffer+12033680>: 0x05 0x00 0x00 0x48 0x3b 0x37 0x48 0x8b
0x7fd52a161e98 <code_gen_buffer+12033688>: 0xf5 0x0f 0x85 0xad 0x19 0x00 0x00 0x48
0x7fd52a161ea0 <code_gen_buffer+12033696>: 0x03 0x77 0x10 0x88 0x1e 0x4c 0x8b 0xe5
0x7fd52a161ea8 <code_gen_buffer+12033704>: 0x4d 0x89 0xa6 0x98 0x00 0x00 0x00 0x49
0x7fd52a161eb0 <code_gen_buffer+12033712>: 0x89 0x9e 0x90 0x00 0x00 0x00 0x48 0x8b
0x7fd52a161eb8 <code_gen_buffer+12033720>: 0xfd 0x48 0x8b 0xf5
(gdb) x/140i tb->tc_ptr
0x7fd52a161e30 <code_gen_buffer+12033584>: mov -0x38(%r14),%ebp
0x7fd52a161e34 <code_gen_buffer+12033588>: test %ebp,%ebp
0x7fd52a161e36 <code_gen_buffer+12033590>: jne 0x7fd52a163824 <code_gen_buffer+12040228>
0x7fd52a161e3c <code_gen_buffer+12033596>: mov (%r14),%rbp
0x7fd52a161e3f <code_gen_buffer+12033599>: mov %rbp,%rdi
0x7fd52a161e42 <code_gen_buffer+12033602>: mov %rbp,%rsi
0x7fd52a161e45 <code_gen_buffer+12033605>: shr $0x7,%rdi
0x7fd52a161e49 <code_gen_buffer+12033609>: and $0xfffffffffffff000,%rsi
0x7fd52a161e50 <code_gen_buffer+12033616>: and $0x1fe0,%edi
0x7fd52a161e56 <code_gen_buffer+12033622>: lea 0x528(%r14,%rdi,1),%rdi
0x7fd52a161e5e <code_gen_buffer+12033630>: cmp (%rdi),%rsi
0x7fd52a161e61 <code_gen_buffer+12033633>: mov %rbp,%rsi
0x7fd52a161e64 <code_gen_buffer+12033636>: jne 0x7fd52a163833 <code_gen_buffer+12040243>
0x7fd52a161e6a <code_gen_buffer+12033642>: add 0x18(%rdi),%rsi
0x7fd52a161e6e <code_gen_buffer+12033646>: movzbl (%rsi),%ebx
0x7fd52a161e71 <code_gen_buffer+12033649>: add %rbp,%rbx
0x7fd52a161e74 <code_gen_buffer+12033652>: mov %rbp,%rdi
0x7fd52a161e77 <code_gen_buffer+12033655>: mov %rbp,%rsi
0x7fd52a161e7a <code_gen_buffer+12033658>: shr $0x7,%rdi
0x7fd52a161e7e <code_gen_buffer+12033662>: and $0xfffffffffffff000,%rsi
0x7fd52a161e85 <code_gen_buffer+12033669>: and $0x1fe0,%edi
0x7fd52a161e8b <code_gen_buffer+12033675>: lea 0x530(%r14,%rdi,1),%rdi
0x7fd52a161e93 <code_gen_buffer+12033683>: cmp (%rdi),%rsi
0x7fd52a161e96 <code_gen_buffer+12033686>: mov %rbp,%rsi
0x7fd52a161e99 <code_gen_buffer+12033689>: jne 0x7fd52a16384c <code_gen_buffer+12040268>
0x7fd52a161e9f <code_gen_buffer+12033695>: add 0x10(%rdi),%rsi
0x7fd52a161ea3 <code_gen_buffer+12033699>: mov %bl,(%rsi)
0x7fd52a161ea5 <code_gen_buffer+12033701>: mov %rbp,%r12
0x7fd52a161ea8 <code_gen_buffer+12033704>: mov %r12,0x98(%r14)
0x7fd52a161eaf <code_gen_buffer+12033711>: mov %rbx,0x90(%r14)
0x7fd52a161eb6 <code_gen_buffer+12033718>: mov %rbp,%rdi
0x7fd52a161eb9 <code_gen_buffer+12033721>: mov %rbp,%rsi
No clue what's going on!
Interestingly, when I start OVMF, qemu actually runs it for a pretty
long time; OVMF produces a bunch of its usual debug output in
"debug.log". The last debug message issued before the abort is
InstallProtocolInterface: 7AB33A91-ACE5-4326-B572-E7EE33D39F16 3E9ED340
which corresponds to Managed Network Protocol being installed (probably
on top of the direct NIC driver, a Simple Network Protocol
implementation). The NIC driver (= SNP) is loaded from the EFI oprom
"pc-bios/efi-e1000.rom" (e1000 is the default NIC model on the PC
target).
If I add "-net none" to the quoted command line, there's no nop slide at
0xffffffe4, nor a crash, but the expected OVMF setup UI doesn't appear
either.
If I add
-netdev user,id=net0 \
-device e1000,netdev=net0,romfile=/dev/null
(OVMF finds no driver for the e1000 NIC), it has the same effect as -net
note -- no crash, no UI.
Changing the NIC model to virtio-net-pci, and allowing qemu to load its
oprom driver ("pc-bios/efi-virtio.rom"), that is, adding:
-netdev user,id=net0 \
-device virtio-net-pci,netdev=net0
the VM runs in an endless reboot loop rather than crashing, and yes it
reboots in the same place, after the Managed Network Protocol is
installed.
Lastly, the options
-netdev user,id=net0 \
-device virtio-net-pci,netdev=net0,romfile=/dev/null
disable the iPXE driver in "pc-bios/efi-virtio.rom", and allow my
VirtioNetDxe driver (embedded in my OVMF build and pending on the
edk2-devel list, but not yet merged) to drive the virtio NIC. In this
case:
- Managed Network Protocol is installed OK (according to debug.log),
- no crash, no UI -- same as with "-net none" and with
"e1000,romfile=/dev/null".
So OVMF loading an iPXE driver from the NIC ROM bar interacts badly with
commit 4a6fd938.
... I know zilch about instruction decoding, but commit 4a6fd938 seems
to change the effect of PREFIX_ADR. Consider
TARGET_X86_64 && CODE64(s) && (prefixes & PREFIX_ADR)
- before the patch, "aflag" was not changed (from the original s->code32),
- after the patch, the least significant bit of "aflag" is inverted.
... I can't believe it! The patch below fixes it for me! OVMF boots and
I'm dropped to the EFI shell, after which I can also access the setup
UI.
------------------------------[ proposed fix ]--------------------------
@@ -4813,7 +4813,11 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
/* 0x66 is ignored if rex.w is set */
dflag = 2;
}
- if (!(prefixes & PREFIX_ADR)) {
+ if (prefixes & PREFIX_ADR) {
+ /* flip it back, 0x67 should have no effect */
+ aflag ^= 1;
+ }
+ else {
aflag = 2;
}
}