diff mbox series

[v6,7/7] tests/tcg/x86_64: add test for plugin memory access

Message ID 20240706191335.878142-8-pierrick.bouvier@linaro.org
State New
Headers show
Series plugins: access values during a memory read/write | expand

Commit Message

Pierrick Bouvier July 6, 2024, 7:13 p.m. UTC
Add an explicit test to check expected memory values are read/written.
For sizes 8, 16, 32, 64 and 128, we generate a load/store operation.
For size 8 -> 64, we generate an atomic __sync_val_compare_and_swap too.
For 128bits memory access, we rely on SSE2 instructions.

By default, atomic accesses are non atomic if a single cpu is running,
so we force creation of a second one by creating a new thread first.

load/store helpers code path can't be triggered easily in user mode (no
softmmu), so we can't test it here.

Can be run with:
make -C build/tests/tcg/x86_64-linux-user run-plugin-test-plugin-mem-access-with-libmem.so

Tested-by: Xingtao Yao <yaoxt.fnst@fujitsu.com>
Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
---
 tests/tcg/x86_64/test-plugin-mem-access.c   | 89 +++++++++++++++++++++
 tests/tcg/x86_64/Makefile.target            |  7 ++
 tests/tcg/x86_64/check-plugin-mem-access.sh | 48 +++++++++++
 3 files changed, 144 insertions(+)
 create mode 100644 tests/tcg/x86_64/test-plugin-mem-access.c
 create mode 100755 tests/tcg/x86_64/check-plugin-mem-access.sh

Comments

Richard Henderson July 7, 2024, 6:25 p.m. UTC | #1
On 7/6/24 12:13, Pierrick Bouvier wrote:
> +++ b/tests/tcg/x86_64/test-plugin-mem-access.c
> @@ -0,0 +1,89 @@
> +#include <emmintrin.h>
> +#include <pthread.h>

All new files should have license boilerplate and description.
You can use spdx to limit to just a couple of lines.


r~
Alex Bennée July 8, 2024, 7:15 p.m. UTC | #2
Pierrick Bouvier <pierrick.bouvier@linaro.org> writes:

> Add an explicit test to check expected memory values are read/written.
> For sizes 8, 16, 32, 64 and 128, we generate a load/store operation.
> For size 8 -> 64, we generate an atomic __sync_val_compare_and_swap too.
> For 128bits memory access, we rely on SSE2 instructions.
>
> By default, atomic accesses are non atomic if a single cpu is running,
> so we force creation of a second one by creating a new thread first.
>
> load/store helpers code path can't be triggered easily in user mode (no
> softmmu), so we can't test it here.
>
> Can be run with:
> make -C build/tests/tcg/x86_64-linux-user run-plugin-test-plugin-mem-access-with-libmem.so
>
> Tested-by: Xingtao Yao <yaoxt.fnst@fujitsu.com>
> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
> ---
>  tests/tcg/x86_64/test-plugin-mem-access.c   | 89 +++++++++++++++++++++
>  tests/tcg/x86_64/Makefile.target            |  7 ++
>  tests/tcg/x86_64/check-plugin-mem-access.sh | 48 +++++++++++
>  3 files changed, 144 insertions(+)
>  create mode 100644 tests/tcg/x86_64/test-plugin-mem-access.c
>  create mode 100755 tests/tcg/x86_64/check-plugin-mem-access.sh
>
> diff --git a/tests/tcg/x86_64/test-plugin-mem-access.c b/tests/tcg/x86_64/test-plugin-mem-access.c
> new file mode 100644
> index 00000000000..7fdd6a55829
> --- /dev/null
> +++ b/tests/tcg/x86_64/test-plugin-mem-access.c
> @@ -0,0 +1,89 @@
> +#include <emmintrin.h>
> +#include <pthread.h>
> +#include <stdint.h>
> +#include <stdlib.h>
> +
> +static void *data;
> +
> +#define DEFINE_STORE(name, type, value) \
> +static void store_##name(void)          \
> +{                                       \
> +    *((type *)data) = value;            \
> +}
> +
> +#define DEFINE_ATOMIC_OP(name, type, value)                 \
> +static void atomic_op_##name(void)                          \
> +{                                                           \
> +    *((type *)data) = 0x42;                                 \
> +    __sync_val_compare_and_swap((type *)data, 0x42, value); \

Should we exercise the other compare and swap ops? Do they all come
through the same rwm path?

> +}
> +
> +#define DEFINE_LOAD(name, type)                         \
> +static void load_##name(void)                           \
> +{                                                       \
> +    register type var asm("eax") = *((type *) data);    \
> +    (void)var;                                          \

This is a bit weird. It's the only inline asm needed that makes this a
non-multiarch test. Why?

> +}
> +
> +DEFINE_STORE(u8, uint8_t, 0xf1)
> +DEFINE_ATOMIC_OP(u8, uint8_t, 0xf1)
> +DEFINE_LOAD(u8, uint8_t)
> +DEFINE_STORE(u16, uint16_t, 0xf123)
> +DEFINE_ATOMIC_OP(u16, uint16_t, 0xf123)
> +DEFINE_LOAD(u16, uint16_t)
> +DEFINE_STORE(u32, uint32_t, 0xff112233)
> +DEFINE_ATOMIC_OP(u32, uint32_t, 0xff112233)
> +DEFINE_LOAD(u32, uint32_t)
> +DEFINE_STORE(u64, uint64_t, 0xf123456789abcdef)
> +DEFINE_ATOMIC_OP(u64, uint64_t, 0xf123456789abcdef)
> +DEFINE_LOAD(u64, uint64_t)
> +
> +static void store_u128(void)
> +{
> +    _mm_store_si128(data, _mm_set_epi32(0xf1223344, 0x55667788,
> +                                        0xf1234567, 0x89abcdef));
> +}
> +
> +static void load_u128(void)
> +{
> +    __m128i var = _mm_load_si128(data);
> +    (void)var;
> +}
> +
> +static void *f(void *p)
> +{
> +    return NULL;
> +}
> +
> +int main(void)
> +{
> +    /*
> +     * We force creation of a second thread to enable cpu flag CF_PARALLEL.
> +     * This will generate atomic operations when needed.
> +     */
> +    pthread_t thread;
> +    pthread_create(&thread, NULL, &f, NULL);
> +    pthread_join(thread, NULL);
> +
> +    data = malloc(sizeof(__m128i));
> +    atomic_op_u8();
> +    store_u8();
> +    load_u8();
> +
> +    atomic_op_u16();
> +    store_u16();
> +    load_u16();
> +
> +    atomic_op_u32();
> +    store_u32();
> +    load_u32();
> +
> +    atomic_op_u64();
> +    store_u64();
> +    load_u64();
> +
> +    store_u128();
> +    load_u128();
> +
> +    free(data);
> +}
> diff --git a/tests/tcg/x86_64/Makefile.target b/tests/tcg/x86_64/Makefile.target
> index eda9bd7396c..3edc29b924d 100644
> --- a/tests/tcg/x86_64/Makefile.target
> +++ b/tests/tcg/x86_64/Makefile.target
> @@ -16,6 +16,7 @@ X86_64_TESTS += noexec
>  X86_64_TESTS += cmpxchg
>  X86_64_TESTS += adox
>  X86_64_TESTS += test-1648
> +PLUGINS_TESTS += test-plugin-mem-access
>  TESTS=$(MULTIARCH_TESTS) $(X86_64_TESTS) test-x86_64
>  else
>  TESTS=$(MULTIARCH_TESTS)
> @@ -26,6 +27,12 @@ adox: CFLAGS=-O2
>  run-test-i386-ssse3: QEMU_OPTS += -cpu max
>  run-plugin-test-i386-ssse3-%: QEMU_OPTS += -cpu max
>  
> +run-plugin-test-plugin-mem-access-with-libmem.so: \
> +	PLUGIN_ARGS=$(COMMA)print-accesses=true
> +run-plugin-test-plugin-mem-access-with-libmem.so: \
> +	CHECK_PLUGIN_OUTPUT_COMMAND= \
> +	$(SRC_PATH)/tests/tcg/x86_64/check-plugin-mem-access.sh
> +
>  test-x86_64: LDFLAGS+=-lm -lc
>  test-x86_64: test-i386.c test-i386.h test-i386-shift.h test-i386-muldiv.h
>  	$(CC) $(CFLAGS) $< -o $@ $(LDFLAGS)
> diff --git a/tests/tcg/x86_64/check-plugin-mem-access.sh b/tests/tcg/x86_64/check-plugin-mem-access.sh
> new file mode 100755
> index 00000000000..163f1cfad34
> --- /dev/null
> +++ b/tests/tcg/x86_64/check-plugin-mem-access.sh
> @@ -0,0 +1,48 @@
> +#!/usr/bin/env bash
> +
> +set -euo pipefail
> +
> +die()
> +{
> +    echo "$@" 1>&2
> +    exit 1
> +}
> +
> +check()
> +{
> +    file=$1
> +    pattern=$2
> +    grep "$pattern" "$file" > /dev/null || die "\"$pattern\" not found in $file"
> +}
> +
> +[ $# -eq 1 ] || die "usage: plugin_out_file"
> +
> +plugin_out=$1
> +
> +expected()
> +{
> +    cat << EOF
> +,store_u8,.*,8,store,0xf1
> +,atomic_op_u8,.*,8,load,0x42
> +,atomic_op_u8,.*,8,store,0xf1
> +,load_u8,.*,8,load,0xf1
> +,store_u16,.*,16,store,0xf123
> +,atomic_op_u16,.*,16,load,0x0042
> +,atomic_op_u16,.*,16,store,0xf123
> +,load_u16,.*,16,load,0xf123
> +,store_u32,.*,32,store,0xff112233
> +,atomic_op_u32,.*,32,load,0x00000042
> +,atomic_op_u32,.*,32,store,0xff112233
> +,load_u32,.*,32,load,0xff112233
> +,store_u64,.*,64,store,0xf123456789abcdef
> +,atomic_op_u64,.*,64,load,0x0000000000000042
> +,atomic_op_u64,.*,64,store,0xf123456789abcdef
> +,load_u64,.*,64,load,0xf123456789abcdef
> +,store_u128,.*,128,store,0xf122334455667788f123456789abcdef
> +,load_u128,.*,128,load,0xf122334455667788f123456789abcdef
> +EOF
> +}
> +
> +expected | while read line; do
> +    check "$plugin_out" "$line"
> +done
Pierrick Bouvier July 12, 2024, 12:41 a.m. UTC | #3
On 7/7/24 11:25, Richard Henderson wrote:
> On 7/6/24 12:13, Pierrick Bouvier wrote:
>> +++ b/tests/tcg/x86_64/test-plugin-mem-access.c
>> @@ -0,0 +1,89 @@
>> +#include <emmintrin.h>
>> +#include <pthread.h>
> 
> All new files should have license boilerplate and description.
> You can use spdx to limit to just a couple of lines.
> 

Added this.

> 
> r~
> 
>
Pierrick Bouvier July 12, 2024, 12:48 a.m. UTC | #4
On 7/8/24 12:15, Alex Bennée wrote:
> Pierrick Bouvier <pierrick.bouvier@linaro.org> writes:
> 
>> Add an explicit test to check expected memory values are read/written.
>> For sizes 8, 16, 32, 64 and 128, we generate a load/store operation.
>> For size 8 -> 64, we generate an atomic __sync_val_compare_and_swap too.
>> For 128bits memory access, we rely on SSE2 instructions.
>>
>> By default, atomic accesses are non atomic if a single cpu is running,
>> so we force creation of a second one by creating a new thread first.
>>
>> load/store helpers code path can't be triggered easily in user mode (no
>> softmmu), so we can't test it here.
>>
>> Can be run with:
>> make -C build/tests/tcg/x86_64-linux-user run-plugin-test-plugin-mem-access-with-libmem.so
>>
>> Tested-by: Xingtao Yao <yaoxt.fnst@fujitsu.com>
>> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
>> ---
>>   tests/tcg/x86_64/test-plugin-mem-access.c   | 89 +++++++++++++++++++++
>>   tests/tcg/x86_64/Makefile.target            |  7 ++
>>   tests/tcg/x86_64/check-plugin-mem-access.sh | 48 +++++++++++
>>   3 files changed, 144 insertions(+)
>>   create mode 100644 tests/tcg/x86_64/test-plugin-mem-access.c
>>   create mode 100755 tests/tcg/x86_64/check-plugin-mem-access.sh
>>
>> diff --git a/tests/tcg/x86_64/test-plugin-mem-access.c b/tests/tcg/x86_64/test-plugin-mem-access.c
>> new file mode 100644
>> index 00000000000..7fdd6a55829
>> --- /dev/null
>> +++ b/tests/tcg/x86_64/test-plugin-mem-access.c
>> @@ -0,0 +1,89 @@
>> +#include <emmintrin.h>
>> +#include <pthread.h>
>> +#include <stdint.h>
>> +#include <stdlib.h>
>> +
>> +static void *data;
>> +
>> +#define DEFINE_STORE(name, type, value) \
>> +static void store_##name(void)          \
>> +{                                       \
>> +    *((type *)data) = value;            \
>> +}
>> +
>> +#define DEFINE_ATOMIC_OP(name, type, value)                 \
>> +static void atomic_op_##name(void)                          \
>> +{                                                           \
>> +    *((type *)data) = 0x42;                                 \
>> +    __sync_val_compare_and_swap((type *)data, 0x42, value); \
> 
> Should we exercise the other compare and swap ops? Do they all come
> through the same rwm path?
> 

There are definitely several paths depending on the generated atomic op.
However, the code is pretty straightforward (and implemented in a single 
function), so my thought was that one way to trigger this was enough.

>> +}
>> +
>> +#define DEFINE_LOAD(name, type)                         \
>> +static void load_##name(void)                           \
>> +{                                                       \
>> +    register type var asm("eax") = *((type *) data);    \
>> +    (void)var;                                          \
> 
> This is a bit weird. It's the only inline asm needed that makes this a
> non-multiarch test. Why?
> 

I'll answer here about why this test is arch specific, and not a multi arch.

The problem I met is that all target architecture do not have native 
64/128 bits types, and depending how code is compiled with gcc, you may 
or not generated expected vector instructions for 128bits operation. 
Same for atomic operations.

Thus, I chose to specialize this test for x86_64, and use sse2 directly 
for 128 bits integers.

You might say "How about adding ifdef for this". Yes sure, but the check 
script would become complicated too, and I just wanted to keep it 
simple. Our interest here is not to check that memory accesses are 
correctly implemented in QEMU, but to check that a specific behavior on 
one arch is the one expected.

Does it make more sense for you?

>> +}
>> +
>> +DEFINE_STORE(u8, uint8_t, 0xf1)
>> +DEFINE_ATOMIC_OP(u8, uint8_t, 0xf1)
>> +DEFINE_LOAD(u8, uint8_t)
>> +DEFINE_STORE(u16, uint16_t, 0xf123)
>> +DEFINE_ATOMIC_OP(u16, uint16_t, 0xf123)
>> +DEFINE_LOAD(u16, uint16_t)
>> +DEFINE_STORE(u32, uint32_t, 0xff112233)
>> +DEFINE_ATOMIC_OP(u32, uint32_t, 0xff112233)
>> +DEFINE_LOAD(u32, uint32_t)
>> +DEFINE_STORE(u64, uint64_t, 0xf123456789abcdef)
>> +DEFINE_ATOMIC_OP(u64, uint64_t, 0xf123456789abcdef)
>> +DEFINE_LOAD(u64, uint64_t)
>> +
>> +static void store_u128(void)
>> +{
>> +    _mm_store_si128(data, _mm_set_epi32(0xf1223344, 0x55667788,
>> +                                        0xf1234567, 0x89abcdef));
>> +}
>> +
>> +static void load_u128(void)
>> +{
>> +    __m128i var = _mm_load_si128(data);
>> +    (void)var;
>> +}
>> +
>> +static void *f(void *p)
>> +{
>> +    return NULL;
>> +}
>> +
>> +int main(void)
>> +{
>> +    /*
>> +     * We force creation of a second thread to enable cpu flag CF_PARALLEL.
>> +     * This will generate atomic operations when needed.
>> +     */
>> +    pthread_t thread;
>> +    pthread_create(&thread, NULL, &f, NULL);
>> +    pthread_join(thread, NULL);
>> +
>> +    data = malloc(sizeof(__m128i));
>> +    atomic_op_u8();
>> +    store_u8();
>> +    load_u8();
>> +
>> +    atomic_op_u16();
>> +    store_u16();
>> +    load_u16();
>> +
>> +    atomic_op_u32();
>> +    store_u32();
>> +    load_u32();
>> +
>> +    atomic_op_u64();
>> +    store_u64();
>> +    load_u64();
>> +
>> +    store_u128();
>> +    load_u128();
>> +
>> +    free(data);
>> +}
>> diff --git a/tests/tcg/x86_64/Makefile.target b/tests/tcg/x86_64/Makefile.target
>> index eda9bd7396c..3edc29b924d 100644
>> --- a/tests/tcg/x86_64/Makefile.target
>> +++ b/tests/tcg/x86_64/Makefile.target
>> @@ -16,6 +16,7 @@ X86_64_TESTS += noexec
>>   X86_64_TESTS += cmpxchg
>>   X86_64_TESTS += adox
>>   X86_64_TESTS += test-1648
>> +PLUGINS_TESTS += test-plugin-mem-access
>>   TESTS=$(MULTIARCH_TESTS) $(X86_64_TESTS) test-x86_64
>>   else
>>   TESTS=$(MULTIARCH_TESTS)
>> @@ -26,6 +27,12 @@ adox: CFLAGS=-O2
>>   run-test-i386-ssse3: QEMU_OPTS += -cpu max
>>   run-plugin-test-i386-ssse3-%: QEMU_OPTS += -cpu max
>>   
>> +run-plugin-test-plugin-mem-access-with-libmem.so: \
>> +	PLUGIN_ARGS=$(COMMA)print-accesses=true
>> +run-plugin-test-plugin-mem-access-with-libmem.so: \
>> +	CHECK_PLUGIN_OUTPUT_COMMAND= \
>> +	$(SRC_PATH)/tests/tcg/x86_64/check-plugin-mem-access.sh
>> +
>>   test-x86_64: LDFLAGS+=-lm -lc
>>   test-x86_64: test-i386.c test-i386.h test-i386-shift.h test-i386-muldiv.h
>>   	$(CC) $(CFLAGS) $< -o $@ $(LDFLAGS)
>> diff --git a/tests/tcg/x86_64/check-plugin-mem-access.sh b/tests/tcg/x86_64/check-plugin-mem-access.sh
>> new file mode 100755
>> index 00000000000..163f1cfad34
>> --- /dev/null
>> +++ b/tests/tcg/x86_64/check-plugin-mem-access.sh
>> @@ -0,0 +1,48 @@
>> +#!/usr/bin/env bash
>> +
>> +set -euo pipefail
>> +
>> +die()
>> +{
>> +    echo "$@" 1>&2
>> +    exit 1
>> +}
>> +
>> +check()
>> +{
>> +    file=$1
>> +    pattern=$2
>> +    grep "$pattern" "$file" > /dev/null || die "\"$pattern\" not found in $file"
>> +}
>> +
>> +[ $# -eq 1 ] || die "usage: plugin_out_file"
>> +
>> +plugin_out=$1
>> +
>> +expected()
>> +{
>> +    cat << EOF
>> +,store_u8,.*,8,store,0xf1
>> +,atomic_op_u8,.*,8,load,0x42
>> +,atomic_op_u8,.*,8,store,0xf1
>> +,load_u8,.*,8,load,0xf1
>> +,store_u16,.*,16,store,0xf123
>> +,atomic_op_u16,.*,16,load,0x0042
>> +,atomic_op_u16,.*,16,store,0xf123
>> +,load_u16,.*,16,load,0xf123
>> +,store_u32,.*,32,store,0xff112233
>> +,atomic_op_u32,.*,32,load,0x00000042
>> +,atomic_op_u32,.*,32,store,0xff112233
>> +,load_u32,.*,32,load,0xff112233
>> +,store_u64,.*,64,store,0xf123456789abcdef
>> +,atomic_op_u64,.*,64,load,0x0000000000000042
>> +,atomic_op_u64,.*,64,store,0xf123456789abcdef
>> +,load_u64,.*,64,load,0xf123456789abcdef
>> +,store_u128,.*,128,store,0xf122334455667788f123456789abcdef
>> +,load_u128,.*,128,load,0xf122334455667788f123456789abcdef
>> +EOF
>> +}
>> +
>> +expected | while read line; do
>> +    check "$plugin_out" "$line"
>> +done
>
Alex Bennée July 12, 2024, 2:51 p.m. UTC | #5
Pierrick Bouvier <pierrick.bouvier@linaro.org> writes:

> On 7/8/24 12:15, Alex Bennée wrote:
>> Pierrick Bouvier <pierrick.bouvier@linaro.org> writes:
>> 
>>> Add an explicit test to check expected memory values are read/written.
>>> For sizes 8, 16, 32, 64 and 128, we generate a load/store operation.
>>> For size 8 -> 64, we generate an atomic __sync_val_compare_and_swap too.
>>> For 128bits memory access, we rely on SSE2 instructions.
>>>
>>> By default, atomic accesses are non atomic if a single cpu is running,
>>> so we force creation of a second one by creating a new thread first.
>>>
>>> load/store helpers code path can't be triggered easily in user mode (no
>>> softmmu), so we can't test it here.
>>>
>>> Can be run with:
>>> make -C build/tests/tcg/x86_64-linux-user run-plugin-test-plugin-mem-access-with-libmem.so
>>>
>>> Tested-by: Xingtao Yao <yaoxt.fnst@fujitsu.com>
>>> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
>>> ---
>>>   tests/tcg/x86_64/test-plugin-mem-access.c   | 89 +++++++++++++++++++++
>>>   tests/tcg/x86_64/Makefile.target            |  7 ++
>>>   tests/tcg/x86_64/check-plugin-mem-access.sh | 48 +++++++++++
>>>   3 files changed, 144 insertions(+)
>>>   create mode 100644 tests/tcg/x86_64/test-plugin-mem-access.c
>>>   create mode 100755 tests/tcg/x86_64/check-plugin-mem-access.sh
>>>
>>> diff --git a/tests/tcg/x86_64/test-plugin-mem-access.c b/tests/tcg/x86_64/test-plugin-mem-access.c
>>> new file mode 100644
>>> index 00000000000..7fdd6a55829
>>> --- /dev/null
>>> +++ b/tests/tcg/x86_64/test-plugin-mem-access.c
>>> @@ -0,0 +1,89 @@
>>> +#include <emmintrin.h>
>>> +#include <pthread.h>
>>> +#include <stdint.h>
>>> +#include <stdlib.h>
>>> +
>>> +static void *data;
>>> +
>>> +#define DEFINE_STORE(name, type, value) \
>>> +static void store_##name(void)          \
>>> +{                                       \
>>> +    *((type *)data) = value;            \
>>> +}
>>> +
>>> +#define DEFINE_ATOMIC_OP(name, type, value)                 \
>>> +static void atomic_op_##name(void)                          \
>>> +{                                                           \
>>> +    *((type *)data) = 0x42;                                 \
>>> +    __sync_val_compare_and_swap((type *)data, 0x42, value); \
>> Should we exercise the other compare and swap ops? Do they all come
>> through the same rwm path?
>> 
>
> There are definitely several paths depending on the generated atomic op.
> However, the code is pretty straightforward (and implemented in a
> single function), so my thought was that one way to trigger this was
> enough.

If they all come through the same path I guess that's OK.

>>> +}
>>> +
>>> +#define DEFINE_LOAD(name, type)                         \
>>> +static void load_##name(void)                           \
>>> +{                                                       \
>>> +    register type var asm("eax") = *((type *) data);    \
>>> +    (void)var;                                          \
>> This is a bit weird. It's the only inline asm needed that makes this
>> a
>> non-multiarch test. Why?
>> 
>
> I'll answer here about why this test is arch specific, and not a multi arch.
>
> The problem I met is that all target architecture do not have native
> 64/128 bits types, and depending how code is compiled with gcc, you
> may or not generated expected vector instructions for 128bits
> operation. Same for atomic operations.

So we do handle this with the sha512 test, building variants of it with
various compiler flags to trigger the use of vectors. So the code is
multiarch but we have arch specific variants as dictated by the
Makefiles, i.e.:

  sha512-sve: CFLAGS=-O3 -march=armv8.1-a+sve
  sha512-sve: sha512.c
          $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)

  TESTS += sha512-sve


> Thus, I chose to specialize this test for x86_64, and use sse2
> directly for 128 bits integers.
>
> You might say "How about adding ifdef for this". Yes sure, but the
> check script would become complicated too, and I just wanted to keep
> it simple.

We can keep the check-script per arch if we have to.

> Our interest here is not to check that memory accesses are
> correctly implemented in QEMU, but to check that a specific behavior
> on one arch is the one expected.

So the problem with not being multiarch is we don't build all targets in
all combinations. To limit CI time we often build a subset and now this
particular subset won't test the plugin paths.

>
> Does it make more sense for you?
>
<snip>
Pierrick Bouvier July 12, 2024, 5:14 p.m. UTC | #6
On 7/12/24 07:51, Alex Bennée wrote:
> Pierrick Bouvier <pierrick.bouvier@linaro.org> writes:
> 
>> On 7/8/24 12:15, Alex Bennée wrote:
>>> Pierrick Bouvier <pierrick.bouvier@linaro.org> writes:
>>>
>>>> Add an explicit test to check expected memory values are read/written.
>>>> For sizes 8, 16, 32, 64 and 128, we generate a load/store operation.
>>>> For size 8 -> 64, we generate an atomic __sync_val_compare_and_swap too.
>>>> For 128bits memory access, we rely on SSE2 instructions.
>>>>
>>>> By default, atomic accesses are non atomic if a single cpu is running,
>>>> so we force creation of a second one by creating a new thread first.
>>>>
>>>> load/store helpers code path can't be triggered easily in user mode (no
>>>> softmmu), so we can't test it here.
>>>>
>>>> Can be run with:
>>>> make -C build/tests/tcg/x86_64-linux-user run-plugin-test-plugin-mem-access-with-libmem.so
>>>>
>>>> Tested-by: Xingtao Yao <yaoxt.fnst@fujitsu.com>
>>>> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
>>>> ---
>>>>    tests/tcg/x86_64/test-plugin-mem-access.c   | 89 +++++++++++++++++++++
>>>>    tests/tcg/x86_64/Makefile.target            |  7 ++
>>>>    tests/tcg/x86_64/check-plugin-mem-access.sh | 48 +++++++++++
>>>>    3 files changed, 144 insertions(+)
>>>>    create mode 100644 tests/tcg/x86_64/test-plugin-mem-access.c
>>>>    create mode 100755 tests/tcg/x86_64/check-plugin-mem-access.sh
>>>>
>>>> diff --git a/tests/tcg/x86_64/test-plugin-mem-access.c b/tests/tcg/x86_64/test-plugin-mem-access.c
>>>> new file mode 100644
>>>> index 00000000000..7fdd6a55829
>>>> --- /dev/null
>>>> +++ b/tests/tcg/x86_64/test-plugin-mem-access.c
>>>> @@ -0,0 +1,89 @@
>>>> +#include <emmintrin.h>
>>>> +#include <pthread.h>
>>>> +#include <stdint.h>
>>>> +#include <stdlib.h>
>>>> +
>>>> +static void *data;
>>>> +
>>>> +#define DEFINE_STORE(name, type, value) \
>>>> +static void store_##name(void)          \
>>>> +{                                       \
>>>> +    *((type *)data) = value;            \
>>>> +}
>>>> +
>>>> +#define DEFINE_ATOMIC_OP(name, type, value)                 \
>>>> +static void atomic_op_##name(void)                          \
>>>> +{                                                           \
>>>> +    *((type *)data) = 0x42;                                 \
>>>> +    __sync_val_compare_and_swap((type *)data, 0x42, value); \
>>> Should we exercise the other compare and swap ops? Do they all come
>>> through the same rwm path?
>>>
>>
>> There are definitely several paths depending on the generated atomic op.
>> However, the code is pretty straightforward (and implemented in a
>> single function), so my thought was that one way to trigger this was
>> enough.
> 
> If they all come through the same path I guess that's OK.
> 
>>>> +}
>>>> +
>>>> +#define DEFINE_LOAD(name, type)                         \
>>>> +static void load_##name(void)                           \
>>>> +{                                                       \
>>>> +    register type var asm("eax") = *((type *) data);    \
>>>> +    (void)var;                                          \
>>> This is a bit weird. It's the only inline asm needed that makes this
>>> a
>>> non-multiarch test. Why?
>>>
>>
>> I'll answer here about why this test is arch specific, and not a multi arch.
>>
>> The problem I met is that all target architecture do not have native
>> 64/128 bits types, and depending how code is compiled with gcc, you
>> may or not generated expected vector instructions for 128bits
>> operation. Same for atomic operations.
> 
> So we do handle this with the sha512 test, building variants of it with
> various compiler flags to trigger the use of vectors. So the code is
> multiarch but we have arch specific variants as dictated by the
> Makefiles, i.e.:
> 
>    sha512-sve: CFLAGS=-O3 -march=armv8.1-a+sve
>    sha512-sve: sha512.c
>            $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
> 
>    TESTS += sha512-sve
>

I suspect this is gonna need several iterations to try all arch, and see 
which ones have native 64/128 bits and which ones have atomic 
instructions. Is that really where we want to spend our (precious) time? 
I'm not convinced of the value of that. We try to test plugins 
implementation, not how QEMU handles memory accesses in general.

The specificity of this test, is what we don't test the correct output 
of a program, but we observe an expected behavior, via the plugins 
trace. So it's a bit different from sha512 example.

>> Thus, I chose to specialize this test for x86_64, and use sse2
>> directly for 128 bits integers.
>>
>> You might say "How about adding ifdef for this". Yes sure, but the
>> check script would become complicated too, and I just wanted to keep
>> it simple.
> 
> We can keep the check-script per arch if we have to.
> 

I would add a target arch param, but not duplicate this to be honest. 
Will be a pain to update if needed.
My goal was to replace this with LLVM filecheck in a following series.

>> Our interest here is not to check that memory accesses are
>> correctly implemented in QEMU, but to check that a specific behavior
>> on one arch is the one expected.
> 
> So the problem with not being multiarch is we don't build all targets in
> all combinations. To limit CI time we often build a subset and now this
> particular subset won't test the plugin paths.
> 

Ok. Is linux-user-x86_64 frequently skipped?
I could add support for aarch64 too, if you think it's worth it. I 
suspect we always have at least one of the two arch that is running in CI.

>>
>> Does it make more sense for you?
>>
> <snip>
>
Pierrick Bouvier July 19, 2024, 1:01 a.m. UTC | #7
On 7/12/24 10:14, Pierrick Bouvier wrote:
> On 7/12/24 07:51, Alex Bennée wrote:
>> Pierrick Bouvier <pierrick.bouvier@linaro.org> writes:
>>
>>> On 7/8/24 12:15, Alex Bennée wrote:
>>>> Pierrick Bouvier <pierrick.bouvier@linaro.org> writes:
>>>>
>>>>> Add an explicit test to check expected memory values are read/written.
>>>>> For sizes 8, 16, 32, 64 and 128, we generate a load/store operation.
>>>>> For size 8 -> 64, we generate an atomic __sync_val_compare_and_swap too.
>>>>> For 128bits memory access, we rely on SSE2 instructions.
>>>>>
>>>>> By default, atomic accesses are non atomic if a single cpu is running,
>>>>> so we force creation of a second one by creating a new thread first.
>>>>>
>>>>> load/store helpers code path can't be triggered easily in user mode (no
>>>>> softmmu), so we can't test it here.
>>>>>
>>>>> Can be run with:
>>>>> make -C build/tests/tcg/x86_64-linux-user run-plugin-test-plugin-mem-access-with-libmem.so
>>>>>
>>>>> Tested-by: Xingtao Yao <yaoxt.fnst@fujitsu.com>
>>>>> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
>>>>> ---
>>>>>     tests/tcg/x86_64/test-plugin-mem-access.c   | 89 +++++++++++++++++++++
>>>>>     tests/tcg/x86_64/Makefile.target            |  7 ++
>>>>>     tests/tcg/x86_64/check-plugin-mem-access.sh | 48 +++++++++++
>>>>>     3 files changed, 144 insertions(+)
>>>>>     create mode 100644 tests/tcg/x86_64/test-plugin-mem-access.c
>>>>>     create mode 100755 tests/tcg/x86_64/check-plugin-mem-access.sh
>>>>>
>>>>> diff --git a/tests/tcg/x86_64/test-plugin-mem-access.c b/tests/tcg/x86_64/test-plugin-mem-access.c
>>>>> new file mode 100644
>>>>> index 00000000000..7fdd6a55829
>>>>> --- /dev/null
>>>>> +++ b/tests/tcg/x86_64/test-plugin-mem-access.c
>>>>> @@ -0,0 +1,89 @@
>>>>> +#include <emmintrin.h>
>>>>> +#include <pthread.h>
>>>>> +#include <stdint.h>
>>>>> +#include <stdlib.h>
>>>>> +
>>>>> +static void *data;
>>>>> +
>>>>> +#define DEFINE_STORE(name, type, value) \
>>>>> +static void store_##name(void)          \
>>>>> +{                                       \
>>>>> +    *((type *)data) = value;            \
>>>>> +}
>>>>> +
>>>>> +#define DEFINE_ATOMIC_OP(name, type, value)                 \
>>>>> +static void atomic_op_##name(void)                          \
>>>>> +{                                                           \
>>>>> +    *((type *)data) = 0x42;                                 \
>>>>> +    __sync_val_compare_and_swap((type *)data, 0x42, value); \
>>>> Should we exercise the other compare and swap ops? Do they all come
>>>> through the same rwm path?
>>>>
>>>
>>> There are definitely several paths depending on the generated atomic op.
>>> However, the code is pretty straightforward (and implemented in a
>>> single function), so my thought was that one way to trigger this was
>>> enough.
>>
>> If they all come through the same path I guess that's OK.
>>
>>>>> +}
>>>>> +
>>>>> +#define DEFINE_LOAD(name, type)                         \
>>>>> +static void load_##name(void)                           \
>>>>> +{                                                       \
>>>>> +    register type var asm("eax") = *((type *) data);    \
>>>>> +    (void)var;                                          \
>>>> This is a bit weird. It's the only inline asm needed that makes this
>>>> a
>>>> non-multiarch test. Why?
>>>>
>>>
>>> I'll answer here about why this test is arch specific, and not a multi arch.
>>>
>>> The problem I met is that all target architecture do not have native
>>> 64/128 bits types, and depending how code is compiled with gcc, you
>>> may or not generated expected vector instructions for 128bits
>>> operation. Same for atomic operations.
>>
>> So we do handle this with the sha512 test, building variants of it with
>> various compiler flags to trigger the use of vectors. So the code is
>> multiarch but we have arch specific variants as dictated by the
>> Makefiles, i.e.:
>>
>>     sha512-sve: CFLAGS=-O3 -march=armv8.1-a+sve
>>     sha512-sve: sha512.c
>>             $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
>>
>>     TESTS += sha512-sve
>>
> 
> I suspect this is gonna need several iterations to try all arch, and see
> which ones have native 64/128 bits and which ones have atomic
> instructions. Is that really where we want to spend our (precious) time?
> I'm not convinced of the value of that. We try to test plugins
> implementation, not how QEMU handles memory accesses in general.
> 
> The specificity of this test, is what we don't test the correct output
> of a program, but we observe an expected behavior, via the plugins
> trace. So it's a bit different from sha512 example.
> 
>>> Thus, I chose to specialize this test for x86_64, and use sse2
>>> directly for 128 bits integers.
>>>
>>> You might say "How about adding ifdef for this". Yes sure, but the
>>> check script would become complicated too, and I just wanted to keep
>>> it simple.
>>
>> We can keep the check-script per arch if we have to.
>>
> 
> I would add a target arch param, but not duplicate this to be honest.
> Will be a pain to update if needed.
> My goal was to replace this with LLVM filecheck in a following series.
> 
>>> Our interest here is not to check that memory accesses are
>>> correctly implemented in QEMU, but to check that a specific behavior
>>> on one arch is the one expected.
>>
>> So the problem with not being multiarch is we don't build all targets in
>> all combinations. To limit CI time we often build a subset and now this
>> particular subset won't test the plugin paths.
>>
> 
> Ok. Is linux-user-x86_64 frequently skipped?
> I could add support for aarch64 too, if you think it's worth it. I
> suspect we always have at least one of the two arch that is running in CI.
> 
>>>
>>> Does it make more sense for you?
>>>
>> <snip>
>>

After discussing with Alex, our goal will be to have a multiarch test. 
I'll do this for the next version.
diff mbox series

Patch

diff --git a/tests/tcg/x86_64/test-plugin-mem-access.c b/tests/tcg/x86_64/test-plugin-mem-access.c
new file mode 100644
index 00000000000..7fdd6a55829
--- /dev/null
+++ b/tests/tcg/x86_64/test-plugin-mem-access.c
@@ -0,0 +1,89 @@ 
+#include <emmintrin.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+static void *data;
+
+#define DEFINE_STORE(name, type, value) \
+static void store_##name(void)          \
+{                                       \
+    *((type *)data) = value;            \
+}
+
+#define DEFINE_ATOMIC_OP(name, type, value)                 \
+static void atomic_op_##name(void)                          \
+{                                                           \
+    *((type *)data) = 0x42;                                 \
+    __sync_val_compare_and_swap((type *)data, 0x42, value); \
+}
+
+#define DEFINE_LOAD(name, type)                         \
+static void load_##name(void)                           \
+{                                                       \
+    register type var asm("eax") = *((type *) data);    \
+    (void)var;                                          \
+}
+
+DEFINE_STORE(u8, uint8_t, 0xf1)
+DEFINE_ATOMIC_OP(u8, uint8_t, 0xf1)
+DEFINE_LOAD(u8, uint8_t)
+DEFINE_STORE(u16, uint16_t, 0xf123)
+DEFINE_ATOMIC_OP(u16, uint16_t, 0xf123)
+DEFINE_LOAD(u16, uint16_t)
+DEFINE_STORE(u32, uint32_t, 0xff112233)
+DEFINE_ATOMIC_OP(u32, uint32_t, 0xff112233)
+DEFINE_LOAD(u32, uint32_t)
+DEFINE_STORE(u64, uint64_t, 0xf123456789abcdef)
+DEFINE_ATOMIC_OP(u64, uint64_t, 0xf123456789abcdef)
+DEFINE_LOAD(u64, uint64_t)
+
+static void store_u128(void)
+{
+    _mm_store_si128(data, _mm_set_epi32(0xf1223344, 0x55667788,
+                                        0xf1234567, 0x89abcdef));
+}
+
+static void load_u128(void)
+{
+    __m128i var = _mm_load_si128(data);
+    (void)var;
+}
+
+static void *f(void *p)
+{
+    return NULL;
+}
+
+int main(void)
+{
+    /*
+     * We force creation of a second thread to enable cpu flag CF_PARALLEL.
+     * This will generate atomic operations when needed.
+     */
+    pthread_t thread;
+    pthread_create(&thread, NULL, &f, NULL);
+    pthread_join(thread, NULL);
+
+    data = malloc(sizeof(__m128i));
+    atomic_op_u8();
+    store_u8();
+    load_u8();
+
+    atomic_op_u16();
+    store_u16();
+    load_u16();
+
+    atomic_op_u32();
+    store_u32();
+    load_u32();
+
+    atomic_op_u64();
+    store_u64();
+    load_u64();
+
+    store_u128();
+    load_u128();
+
+    free(data);
+}
diff --git a/tests/tcg/x86_64/Makefile.target b/tests/tcg/x86_64/Makefile.target
index eda9bd7396c..3edc29b924d 100644
--- a/tests/tcg/x86_64/Makefile.target
+++ b/tests/tcg/x86_64/Makefile.target
@@ -16,6 +16,7 @@  X86_64_TESTS += noexec
 X86_64_TESTS += cmpxchg
 X86_64_TESTS += adox
 X86_64_TESTS += test-1648
+PLUGINS_TESTS += test-plugin-mem-access
 TESTS=$(MULTIARCH_TESTS) $(X86_64_TESTS) test-x86_64
 else
 TESTS=$(MULTIARCH_TESTS)
@@ -26,6 +27,12 @@  adox: CFLAGS=-O2
 run-test-i386-ssse3: QEMU_OPTS += -cpu max
 run-plugin-test-i386-ssse3-%: QEMU_OPTS += -cpu max
 
+run-plugin-test-plugin-mem-access-with-libmem.so: \
+	PLUGIN_ARGS=$(COMMA)print-accesses=true
+run-plugin-test-plugin-mem-access-with-libmem.so: \
+	CHECK_PLUGIN_OUTPUT_COMMAND= \
+	$(SRC_PATH)/tests/tcg/x86_64/check-plugin-mem-access.sh
+
 test-x86_64: LDFLAGS+=-lm -lc
 test-x86_64: test-i386.c test-i386.h test-i386-shift.h test-i386-muldiv.h
 	$(CC) $(CFLAGS) $< -o $@ $(LDFLAGS)
diff --git a/tests/tcg/x86_64/check-plugin-mem-access.sh b/tests/tcg/x86_64/check-plugin-mem-access.sh
new file mode 100755
index 00000000000..163f1cfad34
--- /dev/null
+++ b/tests/tcg/x86_64/check-plugin-mem-access.sh
@@ -0,0 +1,48 @@ 
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+die()
+{
+    echo "$@" 1>&2
+    exit 1
+}
+
+check()
+{
+    file=$1
+    pattern=$2
+    grep "$pattern" "$file" > /dev/null || die "\"$pattern\" not found in $file"
+}
+
+[ $# -eq 1 ] || die "usage: plugin_out_file"
+
+plugin_out=$1
+
+expected()
+{
+    cat << EOF
+,store_u8,.*,8,store,0xf1
+,atomic_op_u8,.*,8,load,0x42
+,atomic_op_u8,.*,8,store,0xf1
+,load_u8,.*,8,load,0xf1
+,store_u16,.*,16,store,0xf123
+,atomic_op_u16,.*,16,load,0x0042
+,atomic_op_u16,.*,16,store,0xf123
+,load_u16,.*,16,load,0xf123
+,store_u32,.*,32,store,0xff112233
+,atomic_op_u32,.*,32,load,0x00000042
+,atomic_op_u32,.*,32,store,0xff112233
+,load_u32,.*,32,load,0xff112233
+,store_u64,.*,64,store,0xf123456789abcdef
+,atomic_op_u64,.*,64,load,0x0000000000000042
+,atomic_op_u64,.*,64,store,0xf123456789abcdef
+,load_u64,.*,64,load,0xf123456789abcdef
+,store_u128,.*,128,store,0xf122334455667788f123456789abcdef
+,load_u128,.*,128,load,0xf122334455667788f123456789abcdef
+EOF
+}
+
+expected | while read line; do
+    check "$plugin_out" "$line"
+done