diff mbox series

hw/char/stm32l4x5_usart.c: Fix ACK and min access size

Message ID 20240902061944.526873-1-satur9nine@gmail.com
State New
Headers show
Series hw/char/stm32l4x5_usart.c: Fix ACK and min access size | expand

Commit Message

Jacob Abrams Sept. 2, 2024, 6:19 a.m. UTC
These changes allow the official STM32L4xx HAL UART driver to function
properly with the b-l475e-iot01a machine.

Modifying USART_CR1 TE bit should alter USART_ISR TEACK bit, and
likewise for RE and REACK bit.

USART registers may be accessed via 16-bit instructions.

Reseting USART_CR1 UE bit should restore ISR to default value.

Fixes: 87b77e6e01ca ("hw/char/stm32l4x5_usart: Enable serial read and write")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2540
Signed-off-by: Jacob Abrams <satur9nine@gmail.com>
---
 hw/char/stm32l4x5_usart.c          | 29 +++++++++++++++++++---
 tests/qtest/stm32l4x5_usart-test.c | 39 +++++++++++++++++++++++++++++-
 2 files changed, 64 insertions(+), 4 deletions(-)

Comments

Peter Maydell Sept. 6, 2024, 4:12 p.m. UTC | #1
On Mon, 2 Sept 2024 at 14:38, Jacob Abrams <satur9nine@gmail.com> wrote:
>
> These changes allow the official STM32L4xx HAL UART driver to function
> properly with the b-l475e-iot01a machine.
>
> Modifying USART_CR1 TE bit should alter USART_ISR TEACK bit, and
> likewise for RE and REACK bit.
>
> USART registers may be accessed via 16-bit instructions.
>
> Reseting USART_CR1 UE bit should restore ISR to default value.
>
> Fixes: 87b77e6e01ca ("hw/char/stm32l4x5_usart: Enable serial read and write")
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2540
> Signed-off-by: Jacob Abrams <satur9nine@gmail.com>

Thanks for this patch. I have one question below, and one
minor nit.

> ---
>  hw/char/stm32l4x5_usart.c          | 29 +++++++++++++++++++---
>  tests/qtest/stm32l4x5_usart-test.c | 39 +++++++++++++++++++++++++++++-
>  2 files changed, 64 insertions(+), 4 deletions(-)
>
> diff --git a/hw/char/stm32l4x5_usart.c b/hw/char/stm32l4x5_usart.c
> index fc5dcac0c4..859fc6236a 100644
> --- a/hw/char/stm32l4x5_usart.c
> +++ b/hw/char/stm32l4x5_usart.c
> @@ -154,6 +154,28 @@ REG32(RDR, 0x24)
>  REG32(TDR, 0x28)
>      FIELD(TDR, TDR, 0, 9)
>
> +#define ISR_RESET_VALUE (0x020000C0)
> +
> +static void stm32l4x5_update_isr(Stm32l4x5UsartBaseState *s)
> +{
> +    if (!(s->cr1 & R_CR1_UE_MASK)) {
> +        s->isr = ISR_RESET_VALUE;
> +        return;
> +    }
> +
> +    if (s->cr1 & R_CR1_TE_MASK) {
> +        s->isr |= R_ISR_TEACK_MASK;
> +    } else {
> +        s->isr &= ~R_ISR_TEACK_MASK;
> +    }
> +
> +    if (s->cr1 & R_CR1_RE_MASK) {
> +        s->isr |= R_ISR_REACK_MASK;
> +    } else {
> +        s->isr &= ~R_ISR_REACK_MASK;
> +    }
> +}

Should we be doing these things based on the value of
the CR1 bits (as this code does), or instead do them
when the bit changes from 0 to 1 (or 1 to 0)?
The wording in the datasheet seems unclear to me; my
impression is that hardware designers often like to
do these things on transitions rather than level based,
but of course you can design h/w both ways...

I guess it could be tested on real hardware:
 * write CR1.TE = 1
 * wait for ISR.TEACK = 1
 * write ISR.TEACK = 0
 * write CR1.TE = 1 (i.e. write again to the register
   without changing its value)
 * does ISR.TEACK go to 1 again, or does it stay 0?

> +
>  static void stm32l4x5_update_irq(Stm32l4x5UsartBaseState *s)
>  {
>      if (((s->isr & R_ISR_WUF_MASK) && (s->cr3 & R_CR3_WUFIE_MASK))        ||
> @@ -367,7 +389,7 @@ static void stm32l4x5_usart_base_reset_hold(Object *obj, ResetType type)
>      s->brr = 0x00000000;
>      s->gtpr = 0x00000000;
>      s->rtor = 0x00000000;
> -    s->isr = 0x020000C0;
> +    s->isr = ISR_RESET_VALUE;
>      s->rdr = 0x00000000;
>      s->tdr = 0x00000000;
>
> @@ -456,6 +478,7 @@ static void stm32l4x5_usart_base_write(void *opaque, hwaddr addr,
>      case A_CR1:
>          s->cr1 = value;
>          stm32l4x5_update_params(s);
> +        stm32l4x5_update_isr(s);
>          stm32l4x5_update_irq(s);
>          return;
>      case A_CR2:
> @@ -508,12 +531,12 @@ static const MemoryRegionOps stm32l4x5_usart_base_ops = {
>      .endianness = DEVICE_NATIVE_ENDIAN,
>      .valid = {
>          .max_access_size = 4,
> -        .min_access_size = 4,
> +        .min_access_size = 2,
>          .unaligned = false
>      },
>      .impl = {
>          .max_access_size = 4,
> -        .min_access_size = 4,
> +        .min_access_size = 2,
>          .unaligned = false
>      },

The effect of these is that a 16-bit write not aligned
to a (4-aligned) register offset will generate a GUEST_ERROR
logged message, and a 16-bit write aligned to a 4-aligned
register offset will write the value zero-extended to 32 bits.
That seems reasonable to me.

>  };
> diff --git a/tests/qtest/stm32l4x5_usart-test.c b/tests/qtest/stm32l4x5_usart-test.c
> index 8902518233..ef886074c0 100644
> --- a/tests/qtest/stm32l4x5_usart-test.c
> +++ b/tests/qtest/stm32l4x5_usart-test.c
> @@ -36,6 +36,8 @@ REG32(GTPR, 0x10)
>  REG32(RTOR, 0x14)
>  REG32(RQR, 0x18)
>  REG32(ISR, 0x1C)
> +    FIELD(ISR, REACK, 22, 1)
> +    FIELD(ISR, TEACK, 21, 1)
>      FIELD(ISR, TXE, 7, 1)
>      FIELD(ISR, RXNE, 5, 1)
>      FIELD(ISR, ORE, 3, 1)
> @@ -191,7 +193,7 @@ static void init_uart(QTestState *qts)
>
>      /* Enable the transmitter, the receiver and the USART. */
>      qtest_writel(qts, (USART1_BASE_ADDR + A_CR1),
> -        R_CR1_UE_MASK | R_CR1_RE_MASK | R_CR1_TE_MASK);
> +        cr1 | R_CR1_UE_MASK | R_CR1_RE_MASK | R_CR1_TE_MASK);
>  }
>
>  static void test_write_read(void)
> @@ -202,6 +204,11 @@ static void test_write_read(void)
>      qtest_writel(qts, USART1_BASE_ADDR + A_TDR, 0xFFFFFFFF);
>      const uint32_t tdr = qtest_readl(qts, USART1_BASE_ADDR + A_TDR);
>      g_assert_cmpuint(tdr, ==, 0x000001FF);
> +
> +    /* Official STM HAL uses uint16_t for TDR */
> +    qtest_writew(qts, USART1_BASE_ADDR + A_TDR, 0xFFFF);
> +    const uint16_t tdr16 = qtest_readw(qts, USART1_BASE_ADDR + A_TDR);
> +    g_assert_cmpuint(tdr16, ==, 0x000001FF);
>  }
>
>  static void test_receive_char(void)
> @@ -296,6 +303,35 @@ static void test_send_str(void)
>      qtest_quit(qts);
>  }
>
> +static void test_ack(void)
> +{
> +    uint32_t cr1;
> +    uint32_t isr;
> +    QTestState *qts = qtest_init("-M b-l475e-iot01a");
> +
> +    init_uart(qts);
> +
> +    cr1 = qtest_readl(qts, (USART1_BASE_ADDR + A_CR1));
> +
> +    /* Disable the transmitter and receiver. */
> +    qtest_writel(qts, (USART1_BASE_ADDR + A_CR1),
> +        cr1 & ~(R_CR1_RE_MASK | R_CR1_TE_MASK));
> +
> +    /* Test ISR ACK for transmitter and receiver disabled */
> +    isr = qtest_readl(qts, (USART1_BASE_ADDR + A_ISR));
> +    g_assert_false(isr & R_ISR_TEACK_MASK);
> +    g_assert_false(isr & R_ISR_REACK_MASK);
> +
> +    /* Enable the transmitter and receiver. */
> +    qtest_writel(qts, (USART1_BASE_ADDR + A_CR1),
> +        cr1 | (R_CR1_RE_MASK | R_CR1_TE_MASK));
> +
> +    /* Test ISR ACK for transmitter and receiver disabled */
> +    isr = qtest_readl(qts, (USART1_BASE_ADDR + A_ISR));
> +    g_assert_true(isr & R_ISR_TEACK_MASK);
> +    g_assert_true(isr & R_ISR_REACK_MASK);

This is missing a
       qtest_quit(qts);
at the end of the function. Without it, on non-Linux
hosts the QEMU process-under-tests will not be properly
killed. We were also missing one at the end of
test_write_read() in this file, which we just fixed
this week in commit d1e8bea9c9c186.

> +}
> +
>  int main(int argc, char **argv)
>  {
>      int ret;
> @@ -308,6 +344,7 @@ int main(int argc, char **argv)
>      qtest_add_func("stm32l4x5/usart/send_char", test_send_char);
>      qtest_add_func("stm32l4x5/usart/receive_str", test_receive_str);
>      qtest_add_func("stm32l4x5/usart/send_str", test_send_str);
> +    qtest_add_func("stm32l4x5/usart/ack", test_ack);
>      ret = g_test_run();
>
>      return ret;
> --
> 2.43.0

thanks
-- PMM
Jacob Abrams Sept. 7, 2024, 8:26 p.m. UTC | #2
On 9/6/24 09:12, Peter Maydell wrote:
> On Mon, 2 Sept 2024 at 14:38, Jacob Abrams <satur9nine@gmail.com> wrote:
>>
>> These changes allow the official STM32L4xx HAL UART driver to function
>> properly with the b-l475e-iot01a machine.
>>
>> Modifying USART_CR1 TE bit should alter USART_ISR TEACK bit, and
>> likewise for RE and REACK bit.
>>
>> USART registers may be accessed via 16-bit instructions.
>>
>> Reseting USART_CR1 UE bit should restore ISR to default value.
>>
>> Fixes: 87b77e6e01ca ("hw/char/stm32l4x5_usart: Enable serial read and write")
>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2540
>> Signed-off-by: Jacob Abrams <satur9nine@gmail.com>
> 
> Thanks for this patch. I have one question below, and one
> minor nit.
> 
>> ---
>>  hw/char/stm32l4x5_usart.c          | 29 +++++++++++++++++++---
>>  tests/qtest/stm32l4x5_usart-test.c | 39 +++++++++++++++++++++++++++++-
>>  2 files changed, 64 insertions(+), 4 deletions(-)
>>
>> diff --git a/hw/char/stm32l4x5_usart.c b/hw/char/stm32l4x5_usart.c
>> index fc5dcac0c4..859fc6236a 100644
>> --- a/hw/char/stm32l4x5_usart.c
>> +++ b/hw/char/stm32l4x5_usart.c
>> @@ -154,6 +154,28 @@ REG32(RDR, 0x24)
>>  REG32(TDR, 0x28)
>>      FIELD(TDR, TDR, 0, 9)
>>
>> +#define ISR_RESET_VALUE (0x020000C0)
>> +
>> +static void stm32l4x5_update_isr(Stm32l4x5UsartBaseState *s)
>> +{
>> +    if (!(s->cr1 & R_CR1_UE_MASK)) {
>> +        s->isr = ISR_RESET_VALUE;
>> +        return;
>> +    }
>> +
>> +    if (s->cr1 & R_CR1_TE_MASK) {
>> +        s->isr |= R_ISR_TEACK_MASK;
>> +    } else {
>> +        s->isr &= ~R_ISR_TEACK_MASK;
>> +    }
>> +
>> +    if (s->cr1 & R_CR1_RE_MASK) {
>> +        s->isr |= R_ISR_REACK_MASK;
>> +    } else {
>> +        s->isr &= ~R_ISR_REACK_MASK;
>> +    }
>> +}
> 
> Should we be doing these things based on the value of
> the CR1 bits (as this code does), or instead do them
> when the bit changes from 0 to 1 (or 1 to 0)?
> The wording in the datasheet seems unclear to me; my
> impression is that hardware designers often like to
> do these things on transitions rather than level based,
> but of course you can design h/w both ways...
> 
> I guess it could be tested on real hardware:
>  * write CR1.TE = 1
>  * wait for ISR.TEACK = 1
>  * write ISR.TEACK = 0
>  * write CR1.TE = 1 (i.e. write again to the register
>    without changing its value)
>  * does ISR.TEACK go to 1 again, or does it stay 0?
> 

Per RM0351 all bits in the ISR register are read-only by software, so we cannot test writing ISR.TEACK on a physical chip, the write would be ignored. Precise description in RM0351 is "This bit is set/reset by hardware, when the Transmit Enable value is taken into account by the USART." Based on this I think it is safe to assume that ISR.TEACK should always reflect the value of CR1.TE, though in real hardware it would take some microseconds for TEACK to be updated after TE is changed.

>> +
>>  static void stm32l4x5_update_irq(Stm32l4x5UsartBaseState *s)
>>  {
>>      if (((s->isr & R_ISR_WUF_MASK) && (s->cr3 & R_CR3_WUFIE_MASK))        ||
>> @@ -367,7 +389,7 @@ static void stm32l4x5_usart_base_reset_hold(Object *obj, ResetType type)
>>      s->brr = 0x00000000;
>>      s->gtpr = 0x00000000;
>>      s->rtor = 0x00000000;
>> -    s->isr = 0x020000C0;
>> +    s->isr = ISR_RESET_VALUE;
>>      s->rdr = 0x00000000;
>>      s->tdr = 0x00000000;
>>
>> @@ -456,6 +478,7 @@ static void stm32l4x5_usart_base_write(void *opaque, hwaddr addr,
>>      case A_CR1:
>>          s->cr1 = value;
>>          stm32l4x5_update_params(s);
>> +        stm32l4x5_update_isr(s);
>>          stm32l4x5_update_irq(s);
>>          return;
>>      case A_CR2:
>> @@ -508,12 +531,12 @@ static const MemoryRegionOps stm32l4x5_usart_base_ops = {
>>      .endianness = DEVICE_NATIVE_ENDIAN,
>>      .valid = {
>>          .max_access_size = 4,
>> -        .min_access_size = 4,
>> +        .min_access_size = 2,
>>          .unaligned = false
>>      },
>>      .impl = {
>>          .max_access_size = 4,
>> -        .min_access_size = 4,
>> +        .min_access_size = 2,
>>          .unaligned = false
>>      },
> 
> The effect of these is that a 16-bit write not aligned
> to a (4-aligned) register offset will generate a GUEST_ERROR
> logged message, and a 16-bit write aligned to a 4-aligned
> register offset will write the value zero-extended to 32 bits.
> That seems reasonable to me.
> 

OK sounds good

>>  };
>> diff --git a/tests/qtest/stm32l4x5_usart-test.c b/tests/qtest/stm32l4x5_usart-test.c
>> index 8902518233..ef886074c0 100644
>> --- a/tests/qtest/stm32l4x5_usart-test.c
>> +++ b/tests/qtest/stm32l4x5_usart-test.c
>> @@ -36,6 +36,8 @@ REG32(GTPR, 0x10)
>>  REG32(RTOR, 0x14)
>>  REG32(RQR, 0x18)
>>  REG32(ISR, 0x1C)
>> +    FIELD(ISR, REACK, 22, 1)
>> +    FIELD(ISR, TEACK, 21, 1)
>>      FIELD(ISR, TXE, 7, 1)
>>      FIELD(ISR, RXNE, 5, 1)
>>      FIELD(ISR, ORE, 3, 1)
>> @@ -191,7 +193,7 @@ static void init_uart(QTestState *qts)
>>
>>      /* Enable the transmitter, the receiver and the USART. */
>>      qtest_writel(qts, (USART1_BASE_ADDR + A_CR1),
>> -        R_CR1_UE_MASK | R_CR1_RE_MASK | R_CR1_TE_MASK);
>> +        cr1 | R_CR1_UE_MASK | R_CR1_RE_MASK | R_CR1_TE_MASK);
>>  }
>>
>>  static void test_write_read(void)
>> @@ -202,6 +204,11 @@ static void test_write_read(void)
>>      qtest_writel(qts, USART1_BASE_ADDR + A_TDR, 0xFFFFFFFF);
>>      const uint32_t tdr = qtest_readl(qts, USART1_BASE_ADDR + A_TDR);
>>      g_assert_cmpuint(tdr, ==, 0x000001FF);
>> +
>> +    /* Official STM HAL uses uint16_t for TDR */
>> +    qtest_writew(qts, USART1_BASE_ADDR + A_TDR, 0xFFFF);
>> +    const uint16_t tdr16 = qtest_readw(qts, USART1_BASE_ADDR + A_TDR);
>> +    g_assert_cmpuint(tdr16, ==, 0x000001FF);
>>  }
>>
>>  static void test_receive_char(void)
>> @@ -296,6 +303,35 @@ static void test_send_str(void)
>>      qtest_quit(qts);
>>  }
>>
>> +static void test_ack(void)
>> +{
>> +    uint32_t cr1;
>> +    uint32_t isr;
>> +    QTestState *qts = qtest_init("-M b-l475e-iot01a");
>> +
>> +    init_uart(qts);
>> +
>> +    cr1 = qtest_readl(qts, (USART1_BASE_ADDR + A_CR1));
>> +
>> +    /* Disable the transmitter and receiver. */
>> +    qtest_writel(qts, (USART1_BASE_ADDR + A_CR1),
>> +        cr1 & ~(R_CR1_RE_MASK | R_CR1_TE_MASK));
>> +
>> +    /* Test ISR ACK for transmitter and receiver disabled */
>> +    isr = qtest_readl(qts, (USART1_BASE_ADDR + A_ISR));
>> +    g_assert_false(isr & R_ISR_TEACK_MASK);
>> +    g_assert_false(isr & R_ISR_REACK_MASK);
>> +
>> +    /* Enable the transmitter and receiver. */
>> +    qtest_writel(qts, (USART1_BASE_ADDR + A_CR1),
>> +        cr1 | (R_CR1_RE_MASK | R_CR1_TE_MASK));
>> +
>> +    /* Test ISR ACK for transmitter and receiver disabled */
>> +    isr = qtest_readl(qts, (USART1_BASE_ADDR + A_ISR));
>> +    g_assert_true(isr & R_ISR_TEACK_MASK);
>> +    g_assert_true(isr & R_ISR_REACK_MASK);
> 
> This is missing a
>        qtest_quit(qts);
> at the end of the function. Without it, on non-Linux
> hosts the QEMU process-under-tests will not be properly
> killed. We were also missing one at the end of
> test_write_read() in this file, which we just fixed
> this week in commit d1e8bea9c9c186.
> 

Thanks I will submit a v2 patch with this corrected.

>> +}
>> +
>>  int main(int argc, char **argv)
>>  {
>>      int ret;
>> @@ -308,6 +344,7 @@ int main(int argc, char **argv)
>>      qtest_add_func("stm32l4x5/usart/send_char", test_send_char);
>>      qtest_add_func("stm32l4x5/usart/receive_str", test_receive_str);
>>      qtest_add_func("stm32l4x5/usart/send_str", test_send_str);
>> +    qtest_add_func("stm32l4x5/usart/ack", test_ack);
>>      ret = g_test_run();
>>
>>      return ret;
>> --
>> 2.43.0
> 
> thanks
> -- PMM

regards
-- Jacob Abrams
Philippe Mathieu-Daudé Sept. 9, 2024, 5:40 p.m. UTC | #3
Hi,

(Cc'ing Arnaud & Inès who are listed as maintainers)

On 6/9/24 18:12, Peter Maydell wrote:
> On Mon, 2 Sept 2024 at 14:38, Jacob Abrams <satur9nine@gmail.com> wrote:
>>
>> These changes allow the official STM32L4xx HAL UART driver to function
>> properly with the b-l475e-iot01a machine.
>>
>> Modifying USART_CR1 TE bit should alter USART_ISR TEACK bit, and
>> likewise for RE and REACK bit.
>>
>> USART registers may be accessed via 16-bit instructions.
>>
>> Reseting USART_CR1 UE bit should restore ISR to default value.
>>
>> Fixes: 87b77e6e01ca ("hw/char/stm32l4x5_usart: Enable serial read and write")
>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2540
>> Signed-off-by: Jacob Abrams <satur9nine@gmail.com>
> 
> Thanks for this patch. I have one question below, and one
> minor nit.
> 
>> ---
>>   hw/char/stm32l4x5_usart.c          | 29 +++++++++++++++++++---
>>   tests/qtest/stm32l4x5_usart-test.c | 39 +++++++++++++++++++++++++++++-
>>   2 files changed, 64 insertions(+), 4 deletions(-)


>>   static void stm32l4x5_update_irq(Stm32l4x5UsartBaseState *s)
>>   {
>>       if (((s->isr & R_ISR_WUF_MASK) && (s->cr3 & R_CR3_WUFIE_MASK))        ||
>> @@ -367,7 +389,7 @@ static void stm32l4x5_usart_base_reset_hold(Object *obj, ResetType type)
>>       s->brr = 0x00000000;
>>       s->gtpr = 0x00000000;
>>       s->rtor = 0x00000000;
>> -    s->isr = 0x020000C0;
>> +    s->isr = ISR_RESET_VALUE;
>>       s->rdr = 0x00000000;
>>       s->tdr = 0x00000000;
>>
>> @@ -456,6 +478,7 @@ static void stm32l4x5_usart_base_write(void *opaque, hwaddr addr,
>>       case A_CR1:
>>           s->cr1 = value;
>>           stm32l4x5_update_params(s);
>> +        stm32l4x5_update_isr(s);
>>           stm32l4x5_update_irq(s);
>>           return;
>>       case A_CR2:
>> @@ -508,12 +531,12 @@ static const MemoryRegionOps stm32l4x5_usart_base_ops = {
>>       .endianness = DEVICE_NATIVE_ENDIAN,
>>       .valid = {
>>           .max_access_size = 4,
>> -        .min_access_size = 4,
>> +        .min_access_size = 2,
>>           .unaligned = false
>>       },
>>       .impl = {
>>           .max_access_size = 4,
>> -        .min_access_size = 4,
>> +        .min_access_size = 2,
>>           .unaligned = false
>>       },
> 
> The effect of these is that a 16-bit write not aligned
> to a (4-aligned) register offset will generate a GUEST_ERROR
> logged message, and a 16-bit write aligned to a 4-aligned
> register offset will write the value zero-extended to 32 bits.
> That seems reasonable to me.

Peter, are you describing the .valid.min_access_size 4 -> 2 change
or the .impl.min_access_size one?

My understanding of the implementation is a 32-bit one:

   REG32(CR1, 0x00)

   struct Stm32l4x5UsartBaseState {
       ...
       uint32_t cr1;

   static void stm32l4x5_usart_base_write(void *opaque, hwaddr addr,
                                   uint64_t val64, unsigned int size)
   {
       ...
       switch (addr) {
       case A_CR1:
           s->cr1 = value;

Am I missing something?

Now, back to .valid.min_access_size, per the section "40.8 USART
registers" of the reference manual:

   The peripheral registers have to be accessed by words (32 bits).

So I don't get the "USART registers may be accessed via 16-bit
instructions." part of this patch.

Jacob, for clarity, can you split this patch in 3 distinct parts
(TE bit, UE bit, unaligned access) so this discussion doesn't delay
the part which are OK?

Thanks,

Phil.
Jacob Abrams Sept. 10, 2024, 4:34 a.m. UTC | #4
On 9/9/24 10:40, Philippe Mathieu-Daudé wrote:
> Hi,
> 
> (Cc'ing Arnaud & Inès who are listed as maintainers)
> 
> On 6/9/24 18:12, Peter Maydell wrote:
>> On Mon, 2 Sept 2024 at 14:38, Jacob Abrams <satur9nine@gmail.com> wrote:
>>>
>>> These changes allow the official STM32L4xx HAL UART driver to function
>>> properly with the b-l475e-iot01a machine.
>>>
>>> Modifying USART_CR1 TE bit should alter USART_ISR TEACK bit, and
>>> likewise for RE and REACK bit.
>>>
>>> USART registers may be accessed via 16-bit instructions.
>>>
>>> Reseting USART_CR1 UE bit should restore ISR to default value.
>>>
>>> Fixes: 87b77e6e01ca ("hw/char/stm32l4x5_usart: Enable serial read and write")
>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2540
>>> Signed-off-by: Jacob Abrams <satur9nine@gmail.com>
>>
>> Thanks for this patch. I have one question below, and one
>> minor nit.
>>
>>> ---
>>>   hw/char/stm32l4x5_usart.c          | 29 +++++++++++++++++++---
>>>   tests/qtest/stm32l4x5_usart-test.c | 39 +++++++++++++++++++++++++++++-
>>>   2 files changed, 64 insertions(+), 4 deletions(-)
> 
> 
>>>   static void stm32l4x5_update_irq(Stm32l4x5UsartBaseState *s)
>>>   {
>>>       if (((s->isr & R_ISR_WUF_MASK) && (s->cr3 & R_CR3_WUFIE_MASK))        ||
>>> @@ -367,7 +389,7 @@ static void stm32l4x5_usart_base_reset_hold(Object *obj, ResetType type)
>>>       s->brr = 0x00000000;
>>>       s->gtpr = 0x00000000;
>>>       s->rtor = 0x00000000;
>>> -    s->isr = 0x020000C0;
>>> +    s->isr = ISR_RESET_VALUE;
>>>       s->rdr = 0x00000000;
>>>       s->tdr = 0x00000000;
>>>
>>> @@ -456,6 +478,7 @@ static void stm32l4x5_usart_base_write(void *opaque, hwaddr addr,
>>>       case A_CR1:
>>>           s->cr1 = value;
>>>           stm32l4x5_update_params(s);
>>> +        stm32l4x5_update_isr(s);
>>>           stm32l4x5_update_irq(s);
>>>           return;
>>>       case A_CR2:
>>> @@ -508,12 +531,12 @@ static const MemoryRegionOps stm32l4x5_usart_base_ops = {
>>>       .endianness = DEVICE_NATIVE_ENDIAN,
>>>       .valid = {
>>>           .max_access_size = 4,
>>> -        .min_access_size = 4,
>>> +        .min_access_size = 2,
>>>           .unaligned = false
>>>       },
>>>       .impl = {
>>>           .max_access_size = 4,
>>> -        .min_access_size = 4,
>>> +        .min_access_size = 2,
>>>           .unaligned = false
>>>       },
>>
>> The effect of these is that a 16-bit write not aligned
>> to a (4-aligned) register offset will generate a GUEST_ERROR
>> logged message, and a 16-bit write aligned to a 4-aligned
>> register offset will write the value zero-extended to 32 bits.
>> That seems reasonable to me.
> 
> Peter, are you describing the .valid.min_access_size 4 -> 2 change
> or the .impl.min_access_size one?
> 
> My understanding of the implementation is a 32-bit one:
> 
>   REG32(CR1, 0x00)
> 
>   struct Stm32l4x5UsartBaseState {
>       ...
>       uint32_t cr1;
> 
>   static void stm32l4x5_usart_base_write(void *opaque, hwaddr addr,
>                                   uint64_t val64, unsigned int size)
>   {
>       ...
>       switch (addr) {
>       case A_CR1:
>           s->cr1 = value;
> 
> Am I missing something?
> 
> Now, back to .valid.min_access_size, per the section "40.8 USART
> registers" of the reference manual:
> 
>   The peripheral registers have to be accessed by words (32 bits).
> 
> So I don't get the "USART registers may be accessed via 16-bit
> instructions." part of this patch.
> 
> Jacob, for clarity, can you split this patch in 3 distinct parts
> (TE bit, UE bit, unaligned access) so this discussion doesn't delay
> the part which are OK?
> 
> Thanks,
> 
> Phil.

Hmm it does appear to be a documentation error in RM0351 section 40.8.

I have an STM32L476 board (chip is nearly identical to the STM32L475 minus the display module) which is functioning with the USART driver code provided by STM in their examples that uses 16-bit access for several of the USART registers, see:

https://github.com/STMicroelectronics/cmsis_device_l4/blob/a2530753e86dd326a75467d28feb92e2ba7d0df2/Include/stm32l475xx.h#L950

I have a contact at STM through my work that I can mention this to just to double check.

Ultimately my goal is to get an STM32CubeL4 example such as UART_Printf running as-is in Qemu and it was only while attempting to run such a project in Qemu that I ran into this problem, see:

https://github.com/STMicroelectronics/STM32CubeL4/tree/master/Projects/STM32L476G-EVAL/Examples/UART/UART_Printf

This project works on my HW dev board just fine.

Of course if you still desire I don't mind splitting this change into 3 parts.

Regards,
Jacob
Philippe Mathieu-Daudé Sept. 10, 2024, 6:28 a.m. UTC | #5
Hi Jacob,

On 10/9/24 06:34, Jacob Abrams wrote:
> On 9/9/24 10:40, Philippe Mathieu-Daudé wrote:
>> Hi,
>>
>> (Cc'ing Arnaud & Inès who are listed as maintainers)
>>
>> On 6/9/24 18:12, Peter Maydell wrote:
>>> On Mon, 2 Sept 2024 at 14:38, Jacob Abrams <satur9nine@gmail.com> wrote:
>>>>
>>>> These changes allow the official STM32L4xx HAL UART driver to function
>>>> properly with the b-l475e-iot01a machine.
>>>>
>>>> Modifying USART_CR1 TE bit should alter USART_ISR TEACK bit, and
>>>> likewise for RE and REACK bit.
>>>>
>>>> USART registers may be accessed via 16-bit instructions.
>>>>
>>>> Reseting USART_CR1 UE bit should restore ISR to default value.
>>>>
>>>> Fixes: 87b77e6e01ca ("hw/char/stm32l4x5_usart: Enable serial read and write")
>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2540
>>>> Signed-off-by: Jacob Abrams <satur9nine@gmail.com>
>>>
>>> Thanks for this patch. I have one question below, and one
>>> minor nit.
>>>
>>>> ---
>>>>    hw/char/stm32l4x5_usart.c          | 29 +++++++++++++++++++---
>>>>    tests/qtest/stm32l4x5_usart-test.c | 39 +++++++++++++++++++++++++++++-
>>>>    2 files changed, 64 insertions(+), 4 deletions(-)
>>
>>
>>>>    static void stm32l4x5_update_irq(Stm32l4x5UsartBaseState *s)
>>>>    {
>>>>        if (((s->isr & R_ISR_WUF_MASK) && (s->cr3 & R_CR3_WUFIE_MASK))        ||
>>>> @@ -367,7 +389,7 @@ static void stm32l4x5_usart_base_reset_hold(Object *obj, ResetType type)
>>>>        s->brr = 0x00000000;
>>>>        s->gtpr = 0x00000000;
>>>>        s->rtor = 0x00000000;
>>>> -    s->isr = 0x020000C0;
>>>> +    s->isr = ISR_RESET_VALUE;
>>>>        s->rdr = 0x00000000;
>>>>        s->tdr = 0x00000000;
>>>>
>>>> @@ -456,6 +478,7 @@ static void stm32l4x5_usart_base_write(void *opaque, hwaddr addr,
>>>>        case A_CR1:
>>>>            s->cr1 = value;
>>>>            stm32l4x5_update_params(s);
>>>> +        stm32l4x5_update_isr(s);
>>>>            stm32l4x5_update_irq(s);
>>>>            return;
>>>>        case A_CR2:
>>>> @@ -508,12 +531,12 @@ static const MemoryRegionOps stm32l4x5_usart_base_ops = {
>>>>        .endianness = DEVICE_NATIVE_ENDIAN,
>>>>        .valid = {
>>>>            .max_access_size = 4,
>>>> -        .min_access_size = 4,
>>>> +        .min_access_size = 2,
>>>>            .unaligned = false
>>>>        },
>>>>        .impl = {
>>>>            .max_access_size = 4,
>>>> -        .min_access_size = 4,
>>>> +        .min_access_size = 2,
>>>>            .unaligned = false
>>>>        },
>>>
>>> The effect of these is that a 16-bit write not aligned
>>> to a (4-aligned) register offset will generate a GUEST_ERROR
>>> logged message, and a 16-bit write aligned to a 4-aligned
>>> register offset will write the value zero-extended to 32 bits.
>>> That seems reasonable to me.
>>
>> Peter, are you describing the .valid.min_access_size 4 -> 2 change
>> or the .impl.min_access_size one?
>>
>> My understanding of the implementation is a 32-bit one:
>>
>>    REG32(CR1, 0x00)
>>
>>    struct Stm32l4x5UsartBaseState {
>>        ...
>>        uint32_t cr1;
>>
>>    static void stm32l4x5_usart_base_write(void *opaque, hwaddr addr,
>>                                    uint64_t val64, unsigned int size)
>>    {
>>        ...
>>        switch (addr) {
>>        case A_CR1:
>>            s->cr1 = value;
>>
>> Am I missing something?
>>
>> Now, back to .valid.min_access_size, per the section "40.8 USART
>> registers" of the reference manual:
>>
>>    The peripheral registers have to be accessed by words (32 bits).
>>
>> So I don't get the "USART registers may be accessed via 16-bit
>> instructions." part of this patch.
>>
>> Jacob, for clarity, can you split this patch in 3 distinct parts
>> (TE bit, UE bit, unaligned access) so this discussion doesn't delay
>> the part which are OK?
>>
>> Thanks,
>>
>> Phil.
> 
> Hmm it does appear to be a documentation error in RM0351 section 40.8.
> 
> I have an STM32L476 board (chip is nearly identical to the STM32L475 minus the display module) which is functioning with the USART driver code provided by STM in their examples that uses 16-bit access for several of the USART registers, see:
> 
> https://github.com/STMicroelectronics/cmsis_device_l4/blob/a2530753e86dd326a75467d28feb92e2ba7d0df2/Include/stm32l475xx.h#L950

OK, we need to mention that in a comment around, something like:

   /*
    * While the reference manual states "the peripheral registers
    * have to be accessed by words (32 bits)", code -- like the
    * STM32CubeL4 CMSIS Device MCU Component -- ran on real
    * hardware shows some registers can be accessed by 16 bits.
    */
   .valid.min_access_size = 2,

> I have a contact at STM through my work that I can mention this to just to double check.

Thanks, that is great.

> Ultimately my goal is to get an STM32CubeL4 example such as UART_Printf running as-is in Qemu and it was only while attempting to run such a project in Qemu that I ran into this problem, see:
> 
> https://github.com/STMicroelectronics/STM32CubeL4/tree/master/Projects/STM32L476G-EVAL/Examples/UART/UART_Printf

It would be nice to add a test using this file in our functional
test suite, see for example tests/functional/test_avr_mega2560.py.

> 
> This project works on my HW dev board just fine.
> 
> Of course if you still desire I don't mind splitting this change into 3 parts.

Well, you clearly list 3 steps in the commit description.
Besides, the discussions whether .imp.min_access_size = 2
is valid still stands.

Regards,

Phil.
Peter Maydell Sept. 10, 2024, 9:34 a.m. UTC | #6
On Mon, 9 Sept 2024 at 18:40, Philippe Mathieu-Daudé <philmd@linaro.org> wrote:
>
> Hi,
>
> (Cc'ing Arnaud & Inès who are listed as maintainers)
>
> On 6/9/24 18:12, Peter Maydell wrote:
> > On Mon, 2 Sept 2024 at 14:38, Jacob Abrams <satur9nine@gmail.com> wrote:
> >>
> >> These changes allow the official STM32L4xx HAL UART driver to function
> >> properly with the b-l475e-iot01a machine.
> >>
> >> Modifying USART_CR1 TE bit should alter USART_ISR TEACK bit, and
> >> likewise for RE and REACK bit.
> >>
> >> USART registers may be accessed via 16-bit instructions.
> >>
> >> Reseting USART_CR1 UE bit should restore ISR to default value.
> >>
> >> Fixes: 87b77e6e01ca ("hw/char/stm32l4x5_usart: Enable serial read and write")
> >> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2540
> >> Signed-off-by: Jacob Abrams <satur9nine@gmail.com>
> >
> > Thanks for this patch. I have one question below, and one
> > minor nit.
> >
> >> ---
> >>   hw/char/stm32l4x5_usart.c          | 29 +++++++++++++++++++---
> >>   tests/qtest/stm32l4x5_usart-test.c | 39 +++++++++++++++++++++++++++++-
> >>   2 files changed, 64 insertions(+), 4 deletions(-)
>
>
> >>   static void stm32l4x5_update_irq(Stm32l4x5UsartBaseState *s)
> >>   {
> >>       if (((s->isr & R_ISR_WUF_MASK) && (s->cr3 & R_CR3_WUFIE_MASK))        ||
> >> @@ -367,7 +389,7 @@ static void stm32l4x5_usart_base_reset_hold(Object *obj, ResetType type)
> >>       s->brr = 0x00000000;
> >>       s->gtpr = 0x00000000;
> >>       s->rtor = 0x00000000;
> >> -    s->isr = 0x020000C0;
> >> +    s->isr = ISR_RESET_VALUE;
> >>       s->rdr = 0x00000000;
> >>       s->tdr = 0x00000000;
> >>
> >> @@ -456,6 +478,7 @@ static void stm32l4x5_usart_base_write(void *opaque, hwaddr addr,
> >>       case A_CR1:
> >>           s->cr1 = value;
> >>           stm32l4x5_update_params(s);
> >> +        stm32l4x5_update_isr(s);
> >>           stm32l4x5_update_irq(s);
> >>           return;
> >>       case A_CR2:
> >> @@ -508,12 +531,12 @@ static const MemoryRegionOps stm32l4x5_usart_base_ops = {
> >>       .endianness = DEVICE_NATIVE_ENDIAN,
> >>       .valid = {
> >>           .max_access_size = 4,
> >> -        .min_access_size = 4,
> >> +        .min_access_size = 2,
> >>           .unaligned = false
> >>       },
> >>       .impl = {
> >>           .max_access_size = 4,
> >> -        .min_access_size = 4,
> >> +        .min_access_size = 2,
> >>           .unaligned = false
> >>       },
> >
> > The effect of these is that a 16-bit write not aligned
> > to a (4-aligned) register offset will generate a GUEST_ERROR
> > logged message, and a 16-bit write aligned to a 4-aligned
> > register offset will write the value zero-extended to 32 bits.
> > That seems reasonable to me.
>
> Peter, are you describing the .valid.min_access_size 4 -> 2 change
> or the .impl.min_access_size one?

I was intending to summarise the effects of making the code
changes above (both .impl and .valid), without any reference
to what the real hardware behaviour might or might not be
(as a starter for figuring out whether the change is reasonable).

> My understanding of the implementation is a 32-bit one:
>
>    REG32(CR1, 0x00)
>
>    struct Stm32l4x5UsartBaseState {
>        ...
>        uint32_t cr1;
>
>    static void stm32l4x5_usart_base_write(void *opaque, hwaddr addr,
>                                    uint64_t val64, unsigned int size)
>    {
>        ...
>        switch (addr) {
>        case A_CR1:
>            s->cr1 = value;
>
> Am I missing something?

If we make the .impl and .valid changes, then the result is
that we permit 16 bit writes to come through to the read
and write functions. Since we don't make any changes to
those functions to specially handle size == 2, you get the
effects of the existing code. If the 16 bit write is aligned
to a 4-aligned register offset it will match one of the A_*
cases, and will write 16-bit-value-zero-extended to it.
If the 16 bit write isn't to a 4-aligned offset it will fall
into the "default" case and be logged as a GUEST_ERROR.

Did I miss some aspect of what the behaviour change is?

-- PMM
Jacob Abrams Sept. 11, 2024, 6:26 a.m. UTC | #7
On 9/10/24 02:34, Peter Maydell wrote:
> On Mon, 9 Sept 2024 at 18:40, Philippe Mathieu-Daudé <philmd@linaro.org> wrote:
>>
>> Hi,
>>
>> (Cc'ing Arnaud & Inès who are listed as maintainers)
>>
>> On 6/9/24 18:12, Peter Maydell wrote:
>>> On Mon, 2 Sept 2024 at 14:38, Jacob Abrams <satur9nine@gmail.com> wrote:
>>>>
>>>> These changes allow the official STM32L4xx HAL UART driver to function
>>>> properly with the b-l475e-iot01a machine.
>>>>
>>>> Modifying USART_CR1 TE bit should alter USART_ISR TEACK bit, and
>>>> likewise for RE and REACK bit.
>>>>
>>>> USART registers may be accessed via 16-bit instructions.
>>>>
>>>> Reseting USART_CR1 UE bit should restore ISR to default value.
>>>>
>>>> Fixes: 87b77e6e01ca ("hw/char/stm32l4x5_usart: Enable serial read and write")
>>>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2540
>>>> Signed-off-by: Jacob Abrams <satur9nine@gmail.com>
>>>
>>> Thanks for this patch. I have one question below, and one
>>> minor nit.
>>>
>>>> ---
>>>>   hw/char/stm32l4x5_usart.c          | 29 +++++++++++++++++++---
>>>>   tests/qtest/stm32l4x5_usart-test.c | 39 +++++++++++++++++++++++++++++-
>>>>   2 files changed, 64 insertions(+), 4 deletions(-)
>>
>>
>>>>   static void stm32l4x5_update_irq(Stm32l4x5UsartBaseState *s)
>>>>   {
>>>>       if (((s->isr & R_ISR_WUF_MASK) && (s->cr3 & R_CR3_WUFIE_MASK))        ||
>>>> @@ -367,7 +389,7 @@ static void stm32l4x5_usart_base_reset_hold(Object *obj, ResetType type)
>>>>       s->brr = 0x00000000;
>>>>       s->gtpr = 0x00000000;
>>>>       s->rtor = 0x00000000;
>>>> -    s->isr = 0x020000C0;
>>>> +    s->isr = ISR_RESET_VALUE;
>>>>       s->rdr = 0x00000000;
>>>>       s->tdr = 0x00000000;
>>>>
>>>> @@ -456,6 +478,7 @@ static void stm32l4x5_usart_base_write(void *opaque, hwaddr addr,
>>>>       case A_CR1:
>>>>           s->cr1 = value;
>>>>           stm32l4x5_update_params(s);
>>>> +        stm32l4x5_update_isr(s);
>>>>           stm32l4x5_update_irq(s);
>>>>           return;
>>>>       case A_CR2:
>>>> @@ -508,12 +531,12 @@ static const MemoryRegionOps stm32l4x5_usart_base_ops = {
>>>>       .endianness = DEVICE_NATIVE_ENDIAN,
>>>>       .valid = {
>>>>           .max_access_size = 4,
>>>> -        .min_access_size = 4,
>>>> +        .min_access_size = 2,
>>>>           .unaligned = false
>>>>       },
>>>>       .impl = {
>>>>           .max_access_size = 4,
>>>> -        .min_access_size = 4,
>>>> +        .min_access_size = 2,
>>>>           .unaligned = false
>>>>       },
>>>
>>> The effect of these is that a 16-bit write not aligned
>>> to a (4-aligned) register offset will generate a GUEST_ERROR
>>> logged message, and a 16-bit write aligned to a 4-aligned
>>> register offset will write the value zero-extended to 32 bits.
>>> That seems reasonable to me.
>>
>> Peter, are you describing the .valid.min_access_size 4 -> 2 change
>> or the .impl.min_access_size one?
> 
> I was intending to summarise the effects of making the code
> changes above (both .impl and .valid), without any reference
> to what the real hardware behaviour might or might not be
> (as a starter for figuring out whether the change is reasonable).
> 
>> My understanding of the implementation is a 32-bit one:
>>
>>    REG32(CR1, 0x00)
>>
>>    struct Stm32l4x5UsartBaseState {
>>        ...
>>        uint32_t cr1;
>>
>>    static void stm32l4x5_usart_base_write(void *opaque, hwaddr addr,
>>                                    uint64_t val64, unsigned int size)
>>    {
>>        ...
>>        switch (addr) {
>>        case A_CR1:
>>            s->cr1 = value;
>>
>> Am I missing something?
> 
> If we make the .impl and .valid changes, then the result is
> that we permit 16 bit writes to come through to the read
> and write functions. Since we don't make any changes to
> those functions to specially handle size == 2, you get the
> effects of the existing code. If the 16 bit write is aligned
> to a 4-aligned register offset it will match one of the A_*
> cases, and will write 16-bit-value-zero-extended to it.
> If the 16 bit write isn't to a 4-aligned offset it will fall
> into the "default" case and be logged as a GUEST_ERROR.
> 
> Did I miss some aspect of what the behaviour change is?
> -- PMM

Ah I see your point now regarding the zero extension writes. Basically you are saying this patch will have the effect that a 16-bit write to one of the USART registers such as USART_RTOR via an ARM instruction like STRH would not be handled correctly because value to be written will be zero extended to 32-bits before being written.

So for example if USART_RTOR currently has the value 0xAAAABBBB and STRH of value 0xCCCC is being applied to the lower 16 bits then one would expect the value to be 0xAAAACCCC in real HW but after my code change in Qemu then USART_RTOR would end up with 0x0000CCCC because the size parameter of stm32l4x5_usart_base_write is not being taken into account.

I do believe this is a valid problem. I will test the behavior on my STM32L476 and report back the real HW behavior and I suspect the result would be STRH write to such registers is allowed and unwritten bits are preserved and not overwritten with zeros.

I submitted a new patch for just TEACK fix already and will report back on the real HW test result later and adjust 16-bit read/write code as needed to behave properly.

I have also submitted a question to my STM contact regarding the reference manual and assuming the answer is yes it is in error I will add comments explaining that to my next patch for the 16-bit read/write handling.

-- Jacob
Peter Maydell Sept. 11, 2024, 10:28 a.m. UTC | #8
On Wed, 11 Sept 2024 at 07:27, Jacob Abrams <satur9nine@gmail.com> wrote:
> On 9/10/24 02:34, Peter Maydell wrote:
> > If we make the .impl and .valid changes, then the result is
> > that we permit 16 bit writes to come through to the read
> > and write functions. Since we don't make any changes to
> > those functions to specially handle size == 2, you get the
> > effects of the existing code. If the 16 bit write is aligned
> > to a 4-aligned register offset it will match one of the A_*
> > cases, and will write 16-bit-value-zero-extended to it.
> > If the 16 bit write isn't to a 4-aligned offset it will fall
> > into the "default" case and be logged as a GUEST_ERROR.
> >
> > Did I miss some aspect of what the behaviour change is?

> Ah I see your point now regarding the zero extension writes. Basically you are saying this patch will have the effect that a 16-bit write to one of the USART registers such as USART_RTOR via an ARM instruction like STRH would not be handled correctly because value to be written will be zero extended to 32-bits before being written.

> I do believe this is a valid problem. I will test the behavior on my STM32L476 and report back the real HW behavior and I suspect the result would be STRH write to such registers is allowed and unwritten bits are preserved and not overwritten with zeros.

Yeah, the datasheet doesn't say here and so checking the h/w
behaviour would be helpful.

> I submitted a new patch for just TEACK fix already and will report back on the real HW test result later and adjust 16-bit read/write code as needed to behave properly.
>
> I have also submitted a question to my STM contact regarding the reference manual and assuming the answer is yes it is in error I will add comments explaining that to my next patch for the 16-bit read/write handling.

Would certainly be interesting to see what STM say.
(If the answer is that some of these cases are invalid
guest behaviour then we have the option of rather than
following the in-practice h/w behaviour exactly instead
doing something easy and logging it as a guest-error.)

thanks
-- PMM
Jacob Abrams Sept. 14, 2024, 3:42 a.m. UTC | #9
On 9/11/24 03:28, Peter Maydell wrote:
> On Wed, 11 Sept 2024 at 07:27, Jacob Abrams <satur9nine@gmail.com> wrote:
>> On 9/10/24 02:34, Peter Maydell wrote:
>>> If we make the .impl and .valid changes, then the result is
>>> that we permit 16 bit writes to come through to the read
>>> and write functions. Since we don't make any changes to
>>> those functions to specially handle size == 2, you get the
>>> effects of the existing code. If the 16 bit write is aligned
>>> to a 4-aligned register offset it will match one of the A_*
>>> cases, and will write 16-bit-value-zero-extended to it.
>>> If the 16 bit write isn't to a 4-aligned offset it will fall
>>> into the "default" case and be logged as a GUEST_ERROR.
>>>
>>> Did I miss some aspect of what the behaviour change is?
> 
>> Ah I see your point now regarding the zero extension writes. Basically you are saying this patch will have the effect that a 16-bit write to one of the USART registers such as USART_RTOR via an ARM instruction like STRH would not be handled correctly because value to be written will be zero extended to 32-bits before being written.
> 
>> I do believe this is a valid problem. I will test the behavior on my STM32L476 and report back the real HW behavior and I suspect the result would be STRH write to such registers is allowed and unwritten bits are preserved and not overwritten with zeros.
> 
> Yeah, the datasheet doesn't say here and so checking the h/w
> behaviour would be helpful.
> 
>> I submitted a new patch for just TEACK fix already and will report back on the real HW test result later and adjust 16-bit read/write code as needed to behave properly.
>>
>> I have also submitted a question to my STM contact regarding the reference manual and assuming the answer is yes it is in error I will add comments explaining that to my next patch for the 16-bit read/write handling.
> 
> Would certainly be interesting to see what STM say.
> (If the answer is that some of these cases are invalid
> guest behaviour then we have the option of rather than
> following the in-practice h/w behaviour exactly instead
> doing something easy and logging it as a guest-error.)
> 
> thanks
> -- PMM

I have received a response from my contact Nicolas Fillon at STM, he wrote "I see we are making 16 bit access read and write to these 16 bit registers in our library for both HAL and LL so this should be a documentation issue."

I noticed in the Qemu source the RegisterAccessInfo struct and associated register_write_memory/register_write functions. These functions appear quite helpful to ensure that reserved bits are not written by using the ro field. I don't see very much usage of this paradigm in Qemu however, only Xilinx and USB DWC3 code appears to use it, but it seems a useful approach in many situations, especially for STM chips.

On the physical STM hardware, specifically the STM32L476, it allows writes smaller than 32-bit to 32-bit registers and does not fault or ignore them. In fact I noticed some very interesting byte duplication behavior, I tested the following code

  // USART_RTOR is a 32-bit register where every bit is r/w

  UartHandle.Instance->RTOR = 0xAAAABBBB;
  printf("USART1 RTOR 1: 0x%08"PRIx32"\n\r", UartHandle.Instance->RTOR);

  uint16_t *rtor_hw = (uint16_t *) &UartHandle.Instance->RTOR;
  rtor_hw[1] = (uint16_t) 0xCCCC;
  printf("USART1 RTOR 2: 0x%08"PRIx32"\n\r", UartHandle.Instance->RTOR);

  uint8_t *rtor_byte = (uint8_t *) &UartHandle.Instance->RTOR;
  rtor_byte[3] = (uint8_t) 0xDD;
  printf("USART1 RTOR 3: 0x%08"PRIx32"\n\r", UartHandle.Instance->RTOR);

  rtor_hw[0] = (uint16_t) 0xEEEE;
  printf("USART1 RTOR 4: 0x%08"PRIx32"\n\r", UartHandle.Instance->RTOR);

  // USART_BRR is a 32-bit register where only the lower 16-bit are r/w, the top 16-bits are reserved and read zero

  uint32_t *BRR32 = (uint32_t *) &USART2->BRR;

  *BRR32 = 0x11223344;
  printf("USART2 BRR 1: 0x%08"PRIx32"\n\r", *BRR32);

  uint16_t *brr_hw = (uint16_t *) BRR32;
  brr_hw[0] = (uint16_t) 0xCCCC;
  printf("USART2 BRR 2: 0x%08"PRIx32"\n\r", *BRR32);

  uint8_t *brr_byte = (uint8_t *) BRR32;
  brr_byte[1] = (uint8_t) 0xDD;
  printf("USART2 BRR 3: 0x%08"PRIx32"\n\r", *BRR32);

  /*
     OUTPUT:

     USART1 RTOR 1: 0xaaaabbbb
     USART1 RTOR 2: 0xcccccccc
     USART1 RTOR 3: 0xdddddddd
     USART1 RTOR 4: 0xeeeeeeee
     USART2 BRR 1: 0x00003344
     USART2 BRR 2: 0x0000cccc
     USART2 BRR 3: 0x0000dddd
   */

Notice how an 8-bit write of just 0xDD is duplicates to all words in the register, strange. I wonder if Qemu is interested in emulating this exact behavior, it doesn't seem particularly critical since the drivers provided by STM should never purposely write to fewer bits than are actually writable.

Jacob
Peter Maydell Oct. 8, 2024, 1:45 p.m. UTC | #10
On Sat, 14 Sept 2024 at 04:42, Jacob Abrams <satur9nine@gmail.com> wrote:
> I have received a response from my contact Nicolas Fillon at STM, he wrote "I see we are making 16 bit access read and write to these 16 bit registers in our library for both HAL and LL so this should be a documentation issue."

Thanks for the update.

> I noticed in the Qemu source the RegisterAccessInfo struct and associated register_write_memory/register_write functions. These functions appear quite helpful to ensure that reserved bits are not written by using the ro field. I don't see very much usage of this paradigm in Qemu however, only Xilinx and USB DWC3 code appears to use it, but it seems a useful approach in many situations, especially for STM chips.
>
> On the physical STM hardware, specifically the STM32L476, it allows writes smaller than 32-bit to 32-bit registers and does not fault or ignore them. In fact I noticed some very interesting byte duplication behavior, I tested the following code

> Notice how an 8-bit write of just 0xDD is duplicates to all words in the register, strange. I wonder if Qemu is interested in emulating this exact behavior, it doesn't seem particularly critical since the drivers provided by STM should never purposely write to fewer bits than are actually writable.

No, I don't think we care about emulating that exact behaviour.
We could if you like do a LOG_GUEST_ERROR for a write that's
smaller than the number of implemented bits, since it seems
highly unlikely that a guest really intended that byte
duplication, but I'm not sure I would personally go to that
effort.

-- PMM
diff mbox series

Patch

diff --git a/hw/char/stm32l4x5_usart.c b/hw/char/stm32l4x5_usart.c
index fc5dcac0c4..859fc6236a 100644
--- a/hw/char/stm32l4x5_usart.c
+++ b/hw/char/stm32l4x5_usart.c
@@ -154,6 +154,28 @@  REG32(RDR, 0x24)
 REG32(TDR, 0x28)
     FIELD(TDR, TDR, 0, 9)
 
+#define ISR_RESET_VALUE (0x020000C0)
+
+static void stm32l4x5_update_isr(Stm32l4x5UsartBaseState *s)
+{
+    if (!(s->cr1 & R_CR1_UE_MASK)) {
+        s->isr = ISR_RESET_VALUE;
+        return;
+    }
+
+    if (s->cr1 & R_CR1_TE_MASK) {
+        s->isr |= R_ISR_TEACK_MASK;
+    } else {
+        s->isr &= ~R_ISR_TEACK_MASK;
+    }
+
+    if (s->cr1 & R_CR1_RE_MASK) {
+        s->isr |= R_ISR_REACK_MASK;
+    } else {
+        s->isr &= ~R_ISR_REACK_MASK;
+    }
+}
+
 static void stm32l4x5_update_irq(Stm32l4x5UsartBaseState *s)
 {
     if (((s->isr & R_ISR_WUF_MASK) && (s->cr3 & R_CR3_WUFIE_MASK))        ||
@@ -367,7 +389,7 @@  static void stm32l4x5_usart_base_reset_hold(Object *obj, ResetType type)
     s->brr = 0x00000000;
     s->gtpr = 0x00000000;
     s->rtor = 0x00000000;
-    s->isr = 0x020000C0;
+    s->isr = ISR_RESET_VALUE;
     s->rdr = 0x00000000;
     s->tdr = 0x00000000;
 
@@ -456,6 +478,7 @@  static void stm32l4x5_usart_base_write(void *opaque, hwaddr addr,
     case A_CR1:
         s->cr1 = value;
         stm32l4x5_update_params(s);
+        stm32l4x5_update_isr(s);
         stm32l4x5_update_irq(s);
         return;
     case A_CR2:
@@ -508,12 +531,12 @@  static const MemoryRegionOps stm32l4x5_usart_base_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
     .valid = {
         .max_access_size = 4,
-        .min_access_size = 4,
+        .min_access_size = 2,
         .unaligned = false
     },
     .impl = {
         .max_access_size = 4,
-        .min_access_size = 4,
+        .min_access_size = 2,
         .unaligned = false
     },
 };
diff --git a/tests/qtest/stm32l4x5_usart-test.c b/tests/qtest/stm32l4x5_usart-test.c
index 8902518233..ef886074c0 100644
--- a/tests/qtest/stm32l4x5_usart-test.c
+++ b/tests/qtest/stm32l4x5_usart-test.c
@@ -36,6 +36,8 @@  REG32(GTPR, 0x10)
 REG32(RTOR, 0x14)
 REG32(RQR, 0x18)
 REG32(ISR, 0x1C)
+    FIELD(ISR, REACK, 22, 1)
+    FIELD(ISR, TEACK, 21, 1)
     FIELD(ISR, TXE, 7, 1)
     FIELD(ISR, RXNE, 5, 1)
     FIELD(ISR, ORE, 3, 1)
@@ -191,7 +193,7 @@  static void init_uart(QTestState *qts)
 
     /* Enable the transmitter, the receiver and the USART. */
     qtest_writel(qts, (USART1_BASE_ADDR + A_CR1),
-        R_CR1_UE_MASK | R_CR1_RE_MASK | R_CR1_TE_MASK);
+        cr1 | R_CR1_UE_MASK | R_CR1_RE_MASK | R_CR1_TE_MASK);
 }
 
 static void test_write_read(void)
@@ -202,6 +204,11 @@  static void test_write_read(void)
     qtest_writel(qts, USART1_BASE_ADDR + A_TDR, 0xFFFFFFFF);
     const uint32_t tdr = qtest_readl(qts, USART1_BASE_ADDR + A_TDR);
     g_assert_cmpuint(tdr, ==, 0x000001FF);
+
+    /* Official STM HAL uses uint16_t for TDR */
+    qtest_writew(qts, USART1_BASE_ADDR + A_TDR, 0xFFFF);
+    const uint16_t tdr16 = qtest_readw(qts, USART1_BASE_ADDR + A_TDR);
+    g_assert_cmpuint(tdr16, ==, 0x000001FF);
 }
 
 static void test_receive_char(void)
@@ -296,6 +303,35 @@  static void test_send_str(void)
     qtest_quit(qts);
 }
 
+static void test_ack(void)
+{
+    uint32_t cr1;
+    uint32_t isr;
+    QTestState *qts = qtest_init("-M b-l475e-iot01a");
+
+    init_uart(qts);
+
+    cr1 = qtest_readl(qts, (USART1_BASE_ADDR + A_CR1));
+
+    /* Disable the transmitter and receiver. */
+    qtest_writel(qts, (USART1_BASE_ADDR + A_CR1),
+        cr1 & ~(R_CR1_RE_MASK | R_CR1_TE_MASK));
+
+    /* Test ISR ACK for transmitter and receiver disabled */
+    isr = qtest_readl(qts, (USART1_BASE_ADDR + A_ISR));
+    g_assert_false(isr & R_ISR_TEACK_MASK);
+    g_assert_false(isr & R_ISR_REACK_MASK);
+
+    /* Enable the transmitter and receiver. */
+    qtest_writel(qts, (USART1_BASE_ADDR + A_CR1),
+        cr1 | (R_CR1_RE_MASK | R_CR1_TE_MASK));
+
+    /* Test ISR ACK for transmitter and receiver disabled */
+    isr = qtest_readl(qts, (USART1_BASE_ADDR + A_ISR));
+    g_assert_true(isr & R_ISR_TEACK_MASK);
+    g_assert_true(isr & R_ISR_REACK_MASK);
+}
+
 int main(int argc, char **argv)
 {
     int ret;
@@ -308,6 +344,7 @@  int main(int argc, char **argv)
     qtest_add_func("stm32l4x5/usart/send_char", test_send_char);
     qtest_add_func("stm32l4x5/usart/receive_str", test_receive_str);
     qtest_add_func("stm32l4x5/usart/send_str", test_send_str);
+    qtest_add_func("stm32l4x5/usart/ack", test_ack);
     ret = g_test_run();
 
     return ret;