diff mbox series

hw/usb/hcd-ohci: Fix #1510, #303: pid not IN or OUT

Message ID CACBuX0To1QWpOTE-HfbXv=tUVWVL0=pvn-+E28EL_mWuqfZ-sw@mail.gmail.com
State New
Headers show
Series hw/usb/hcd-ohci: Fix #1510, #303: pid not IN or OUT | expand

Commit Message

Cord Amfmgm Feb. 6, 2024, 7:13 a.m. UTC
This changes the ohci validation to not assert if invalid
data is fed to the ohci controller. The poc suggested in
https://bugs.launchpad.net/qemu/+bug/1907042
and then migrated to bug #303 does the following to
feed it a SETUP pid and EndPt of 1:

        uint32_t MaxPacket = 64;
        uint32_t TDFormat = 0;
        uint32_t Skip = 0;
        uint32_t Speed = 0;
        uint32_t Direction = 0;  /* #define OHCI_TD_DIR_SETUP 0 */
        uint32_t EndPt = 1;
        uint32_t FuncAddress = 0;
        ed->attr = (MaxPacket << 16) | (TDFormat << 15) | (Skip << 14)
                   | (Speed << 13) | (Direction << 11) | (EndPt << 7)
                   | FuncAddress;
        ed->tailp = /*TDQTailPntr= */ 0;
        ed->headp = ((/*TDQHeadPntr= */ &td[0]) & 0xfffffff0)
                   | (/* ToggleCarry= */ 0 << 1);
        ed->next_ed = (/* NextED= */ 0 & 0xfffffff0)

qemu-fuzz also caught the same issue in #1510. They are
both fixed by this patch.

The if (td.cbp > td.be) logic in ohci_service_td() causes an
ohci_die(). My understanding of the OHCI spec 4.3.1.2
Table 4-2 allows td.cbp to be one byte more than td.be to
signal the buffer has zero length. The new check in qemu
appears to have been added since qemu-4.2. This patch
includes both fixes since they are located very close
together.

Signed-off-by: David Hubbard <dmamfmgm@gmail.com>

 usb_ohci_port_detach(int index) "port #%d"
 usb_ohci_port_wakeup(int index) "port #%d"

Comments

Michael Tokarev April 18, 2024, 3:43 p.m. UTC | #1
06.02.2024 10:13, Cord Amfmgm wrote:
> This changes the ohci validation to not assert if invalid
> data is fed to the ohci controller. The poc suggested in
> https://bugs.launchpad.net/qemu/+bug/1907042
> and then migrated to bug #303 does the following to
> feed it a SETUP pid and EndPt of 1:
> 
>          uint32_t MaxPacket = 64;
>          uint32_t TDFormat = 0;
>          uint32_t Skip = 0;
>          uint32_t Speed = 0;
>          uint32_t Direction = 0;  /* #define OHCI_TD_DIR_SETUP 0 */
>          uint32_t EndPt = 1;
>          uint32_t FuncAddress = 0;
>          ed->attr = (MaxPacket << 16) | (TDFormat << 15) | (Skip << 14)
>                     | (Speed << 13) | (Direction << 11) | (EndPt << 7)
>                     | FuncAddress;
>          ed->tailp = /*TDQTailPntr= */ 0;
>          ed->headp = ((/*TDQHeadPntr= */ &td[0]) & 0xfffffff0)
>                     | (/* ToggleCarry= */ 0 << 1);
>          ed->next_ed = (/* NextED= */ 0 & 0xfffffff0)
> 
> qemu-fuzz also caught the same issue in #1510. They are
> both fixed by this patch.
> 
> The if (td.cbp > td.be) logic in ohci_service_td() causes an
> ohci_die(). My understanding of the OHCI spec 4.3.1.2
> Table 4-2 allows td.cbp to be one byte more than td.be to
> signal the buffer has zero length. The new check in qemu
> appears to have been added since qemu-4.2. This patch
> includes both fixes since they are located very close
> together.
> 
> Signed-off-by: David Hubbard <dmamfmgm@gmail.com>

Wonder if this got lost somehow.  Or is it not needed?

Thanks,

/mjt

> diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
> index d73b53f33c..a53808126f 100644
> --- a/hw/usb/hcd-ohci.c
> +++ b/hw/usb/hcd-ohci.c
> @@ -927,6 +927,11 @@ static int ohci_service_td(OHCIState *ohci,
> struct ohci_ed *ed)
>       case OHCI_TD_DIR_SETUP:
>           str = "setup";
>           pid = USB_TOKEN_SETUP;
> +        if (OHCI_BM(ed->flags, ED_EN) > 0) {  /* setup only allowed to ep 0 */
> +            trace_usb_ohci_td_bad_pid(str, ed->flags, td.flags);
> +            ohci_die(ohci);
> +            return 1;
> +        }
>           break;
>       default:
>           trace_usb_ohci_td_bad_direction(dir);
> @@ -936,8 +941,8 @@ static int ohci_service_td(OHCIState *ohci, struct
> ohci_ed *ed)
>           if ((td.cbp & 0xfffff000) != (td.be & 0xfffff000)) {
>               len = (td.be & 0xfff) + 0x1001 - (td.cbp & 0xfff);
>           } else {
> -            if (td.cbp > td.be) {
> -                trace_usb_ohci_iso_td_bad_cc_overrun(td.cbp, td.be);
> +            if (td.cbp > td.be + 1) {
> +                trace_usb_ohci_td_bad_buf(td.cbp, td.be);
>                   ohci_die(ohci);
>                   return 1;
>               }
> diff --git a/hw/usb/trace-events b/hw/usb/trace-events
> index ed7dc210d3..b47d082fa3 100644
> --- a/hw/usb/trace-events
> +++ b/hw/usb/trace-events
> @@ -28,6 +28,8 @@ usb_ohci_iso_td_data_overrun(int ret, ssize_t len)
> "DataOverrun %d > %zu"
>   usb_ohci_iso_td_data_underrun(int ret) "DataUnderrun %d"
>   usb_ohci_iso_td_nak(int ret) "got NAK/STALL %d"
>   usb_ohci_iso_td_bad_response(int ret) "Bad device response %d"
> +usb_ohci_td_bad_buf(uint32_t cbp, uint32_t be) "Bad cbp = 0x%x > be = 0x%x"
> +usb_ohci_td_bad_pid(const char *s, uint32_t edf, uint32_t tdf) "Bad
> pid %s: ed.flags 0x%x td.flags 0x%x"
>   usb_ohci_port_attach(int index) "port #%d"
>   usb_ohci_port_detach(int index) "port #%d"
>   usb_ohci_port_wakeup(int index) "port #%d"
>
Cord Amfmgm April 19, 2024, 3 p.m. UTC | #2
Hi Michael,

This just got lost somehow. It is still an issue (see
https://gitlab.com/qemu-project/qemu/-/issues/1510 ). I believe this change
fixes the issue.

On Thu, Apr 18, 2024 at 10:43 AM Michael Tokarev <mjt@tls.msk.ru> wrote:

> 06.02.2024 10:13, Cord Amfmgm wrote:
> > This changes the ohci validation to not assert if invalid
> > data is fed to the ohci controller. The poc suggested in
> > https://bugs.launchpad.net/qemu/+bug/1907042
> > and then migrated to bug #303 does the following to
> > feed it a SETUP pid and EndPt of 1:
> >
> >          uint32_t MaxPacket = 64;
> >          uint32_t TDFormat = 0;
> >          uint32_t Skip = 0;
> >          uint32_t Speed = 0;
> >          uint32_t Direction = 0;  /* #define OHCI_TD_DIR_SETUP 0 */
> >          uint32_t EndPt = 1;
> >          uint32_t FuncAddress = 0;
> >          ed->attr = (MaxPacket << 16) | (TDFormat << 15) | (Skip << 14)
> >                     | (Speed << 13) | (Direction << 11) | (EndPt << 7)
> >                     | FuncAddress;
> >          ed->tailp = /*TDQTailPntr= */ 0;
> >          ed->headp = ((/*TDQHeadPntr= */ &td[0]) & 0xfffffff0)
> >                     | (/* ToggleCarry= */ 0 << 1);
> >          ed->next_ed = (/* NextED= */ 0 & 0xfffffff0)
> >
> > qemu-fuzz also caught the same issue in #1510. They are
> > both fixed by this patch.
> >
> > The if (td.cbp > td.be) logic in ohci_service_td() causes an
> > ohci_die(). My understanding of the OHCI spec 4.3.1.2
> > Table 4-2 allows td.cbp to be one byte more than td.be to
> > signal the buffer has zero length. The new check in qemu
> > appears to have been added since qemu-4.2. This patch
> > includes both fixes since they are located very close
> > together.
> >
> > Signed-off-by: David Hubbard <dmamfmgm@gmail.com>
>
> Wonder if this got lost somehow.  Or is it not needed?
>
> Thanks,
>
> /mjt
>
> > diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
> > index d73b53f33c..a53808126f 100644
> > --- a/hw/usb/hcd-ohci.c
> > +++ b/hw/usb/hcd-ohci.c
> > @@ -927,6 +927,11 @@ static int ohci_service_td(OHCIState *ohci,
> > struct ohci_ed *ed)
> >       case OHCI_TD_DIR_SETUP:
> >           str = "setup";
> >           pid = USB_TOKEN_SETUP;
> > +        if (OHCI_BM(ed->flags, ED_EN) > 0) {  /* setup only allowed to
> ep 0 */
> > +            trace_usb_ohci_td_bad_pid(str, ed->flags, td.flags);
> > +            ohci_die(ohci);
> > +            return 1;
> > +        }
> >           break;
> >       default:
> >           trace_usb_ohci_td_bad_direction(dir);
> > @@ -936,8 +941,8 @@ static int ohci_service_td(OHCIState *ohci, struct
> > ohci_ed *ed)
> >           if ((td.cbp & 0xfffff000) != (td.be & 0xfffff000)) {
> >               len = (td.be & 0xfff) + 0x1001 - (td.cbp & 0xfff);
> >           } else {
> > -            if (td.cbp > td.be) {
> > -                trace_usb_ohci_iso_td_bad_cc_overrun(td.cbp, td.be);
> > +            if (td.cbp > td.be + 1) {
> > +                trace_usb_ohci_td_bad_buf(td.cbp, td.be);
> >                   ohci_die(ohci);
> >                   return 1;
> >               }
> > diff --git a/hw/usb/trace-events b/hw/usb/trace-events
> > index ed7dc210d3..b47d082fa3 100644
> > --- a/hw/usb/trace-events
> > +++ b/hw/usb/trace-events
> > @@ -28,6 +28,8 @@ usb_ohci_iso_td_data_overrun(int ret, ssize_t len)
> > "DataOverrun %d > %zu"
> >   usb_ohci_iso_td_data_underrun(int ret) "DataUnderrun %d"
> >   usb_ohci_iso_td_nak(int ret) "got NAK/STALL %d"
> >   usb_ohci_iso_td_bad_response(int ret) "Bad device response %d"
> > +usb_ohci_td_bad_buf(uint32_t cbp, uint32_t be) "Bad cbp = 0x%x > be =
> 0x%x"
> > +usb_ohci_td_bad_pid(const char *s, uint32_t edf, uint32_t tdf) "Bad
> > pid %s: ed.flags 0x%x td.flags 0x%x"
> >   usb_ohci_port_attach(int index) "port #%d"
> >   usb_ohci_port_detach(int index) "port #%d"
> >   usb_ohci_port_wakeup(int index) "port #%d"
> >
>
>
Cord Amfmgm April 24, 2024, 8:43 p.m. UTC | #3
On Thu, Apr 18, 2024 at 10:43 AM Michael Tokarev <mjt@tls.msk.ru> wrote:

> 06.02.2024 10:13, Cord Amfmgm wrote:
> > This changes the ohci validation to not assert if invalid
> > data is fed to the ohci controller. The poc suggested in
> > https://bugs.launchpad.net/qemu/+bug/1907042
> > and then migrated to bug #303 does the following to
> > feed it a SETUP pid and EndPt of 1:
> >
> >          uint32_t MaxPacket = 64;
> >          uint32_t TDFormat = 0;
> >          uint32_t Skip = 0;
> >          uint32_t Speed = 0;
> >          uint32_t Direction = 0;  /* #define OHCI_TD_DIR_SETUP 0 */
> >          uint32_t EndPt = 1;
> >          uint32_t FuncAddress = 0;
> >          ed->attr = (MaxPacket << 16) | (TDFormat << 15) | (Skip << 14)
> >                     | (Speed << 13) | (Direction << 11) | (EndPt << 7)
> >                     | FuncAddress;
> >          ed->tailp = /*TDQTailPntr= */ 0;
> >          ed->headp = ((/*TDQHeadPntr= */ &td[0]) & 0xfffffff0)
> >                     | (/* ToggleCarry= */ 0 << 1);
> >          ed->next_ed = (/* NextED= */ 0 & 0xfffffff0)
> >
> > qemu-fuzz also caught the same issue in #1510. They are
> > both fixed by this patch.
> >
> > The if (td.cbp > td.be) logic in ohci_service_td() causes an
> > ohci_die(). My understanding of the OHCI spec 4.3.1.2
> > Table 4-2 allows td.cbp to be one byte more than td.be to
> > signal the buffer has zero length. The new check in qemu
> > appears to have been added since qemu-4.2. This patch
> > includes both fixes since they are located very close
> > together.
> >
> > Signed-off-by: David Hubbard <dmamfmgm@gmail.com>
>
> Wonder if this got lost somehow.  Or is it not needed?
>
> Thanks,
>
> /mjt
>

Friendly ping! Gerd, can you chime in with how you would like to approach
this? I still need this patch to unblock my qemu workflow - custom OS
development.


>
> > diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
> > index d73b53f33c..a53808126f 100644
> > --- a/hw/usb/hcd-ohci.c
> > +++ b/hw/usb/hcd-ohci.c
> > @@ -927,6 +927,11 @@ static int ohci_service_td(OHCIState *ohci,
> > struct ohci_ed *ed)
> >       case OHCI_TD_DIR_SETUP:
> >           str = "setup";
> >           pid = USB_TOKEN_SETUP;
> > +        if (OHCI_BM(ed->flags, ED_EN) > 0) {  /* setup only allowed to
> ep 0 */
> > +            trace_usb_ohci_td_bad_pid(str, ed->flags, td.flags);
> > +            ohci_die(ohci);
> > +            return 1;
> > +        }
> >           break;
> >       default:
> >           trace_usb_ohci_td_bad_direction(dir);
> > @@ -936,8 +941,8 @@ static int ohci_service_td(OHCIState *ohci, struct
> > ohci_ed *ed)
> >           if ((td.cbp & 0xfffff000) != (td.be & 0xfffff000)) {
> >               len = (td.be & 0xfff) + 0x1001 - (td.cbp & 0xfff);
> >           } else {
> > -            if (td.cbp > td.be) {
> > -                trace_usb_ohci_iso_td_bad_cc_overrun(td.cbp, td.be);
> > +            if (td.cbp > td.be + 1) {
> > +                trace_usb_ohci_td_bad_buf(td.cbp, td.be);
> >                   ohci_die(ohci);
> >                   return 1;
> >               }
> > diff --git a/hw/usb/trace-events b/hw/usb/trace-events
> > index ed7dc210d3..b47d082fa3 100644
> > --- a/hw/usb/trace-events
> > +++ b/hw/usb/trace-events
> > @@ -28,6 +28,8 @@ usb_ohci_iso_td_data_overrun(int ret, ssize_t len)
> > "DataOverrun %d > %zu"
> >   usb_ohci_iso_td_data_underrun(int ret) "DataUnderrun %d"
> >   usb_ohci_iso_td_nak(int ret) "got NAK/STALL %d"
> >   usb_ohci_iso_td_bad_response(int ret) "Bad device response %d"
> > +usb_ohci_td_bad_buf(uint32_t cbp, uint32_t be) "Bad cbp = 0x%x > be =
> 0x%x"
> > +usb_ohci_td_bad_pid(const char *s, uint32_t edf, uint32_t tdf) "Bad
> > pid %s: ed.flags 0x%x td.flags 0x%x"
> >   usb_ohci_port_attach(int index) "port #%d"
> >   usb_ohci_port_detach(int index) "port #%d"
> >   usb_ohci_port_wakeup(int index) "port #%d"
> >
>
Cord Amfmgm May 7, 2024, 8:20 p.m. UTC | #4
On Wed, Apr 24, 2024 at 3:43 PM Cord Amfmgm <dmamfmgm@gmail.com> wrote:

> On Thu, Apr 18, 2024 at 10:43 AM Michael Tokarev <mjt@tls.msk.ru> wrote:
>
>> 06.02.2024 10:13, Cord Amfmgm wrote:
>> > This changes the ohci validation to not assert if invalid
>> > data is fed to the ohci controller. The poc suggested in
>> > https://bugs.launchpad.net/qemu/+bug/1907042
>> > and then migrated to bug #303 does the following to
>> > feed it a SETUP pid and EndPt of 1:
>> >
>> >          uint32_t MaxPacket = 64;
>> >          uint32_t TDFormat = 0;
>> >          uint32_t Skip = 0;
>> >          uint32_t Speed = 0;
>> >          uint32_t Direction = 0;  /* #define OHCI_TD_DIR_SETUP 0 */
>> >          uint32_t EndPt = 1;
>> >          uint32_t FuncAddress = 0;
>> >          ed->attr = (MaxPacket << 16) | (TDFormat << 15) | (Skip << 14)
>> >                     | (Speed << 13) | (Direction << 11) | (EndPt << 7)
>> >                     | FuncAddress;
>> >          ed->tailp = /*TDQTailPntr= */ 0;
>> >          ed->headp = ((/*TDQHeadPntr= */ &td[0]) & 0xfffffff0)
>> >                     | (/* ToggleCarry= */ 0 << 1);
>> >          ed->next_ed = (/* NextED= */ 0 & 0xfffffff0)
>> >
>> > qemu-fuzz also caught the same issue in #1510. They are
>> > both fixed by this patch.
>> >
>> > The if (td.cbp > td.be) logic in ohci_service_td() causes an
>> > ohci_die(). My understanding of the OHCI spec 4.3.1.2
>> > Table 4-2 allows td.cbp to be one byte more than td.be to
>> > signal the buffer has zero length. The new check in qemu
>> > appears to have been added since qemu-4.2. This patch
>> > includes both fixes since they are located very close
>> > together.
>> >
>> > Signed-off-by: David Hubbard <dmamfmgm@gmail.com>
>>
>> Wonder if this got lost somehow.  Or is it not needed?
>>
>> Thanks,
>>
>> /mjt
>>
>
> Friendly ping! Gerd, can you chime in with how you would like to approach
> this? I still need this patch to unblock my qemu workflow - custom OS
> development.
>
>

Can I please ask for an update on this? I'm attempting to figure out if
this patch has been rejected and I need to resubmit / rework it at HEAD?


>
>> > diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
>> > index d73b53f33c..a53808126f 100644
>> > --- a/hw/usb/hcd-ohci.c
>> > +++ b/hw/usb/hcd-ohci.c
>> > @@ -927,6 +927,11 @@ static int ohci_service_td(OHCIState *ohci,
>> > struct ohci_ed *ed)
>> >       case OHCI_TD_DIR_SETUP:
>> >           str = "setup";
>> >           pid = USB_TOKEN_SETUP;
>> > +        if (OHCI_BM(ed->flags, ED_EN) > 0) {  /* setup only allowed to
>> ep 0 */
>> > +            trace_usb_ohci_td_bad_pid(str, ed->flags, td.flags);
>> > +            ohci_die(ohci);
>> > +            return 1;
>> > +        }
>> >           break;
>> >       default:
>> >           trace_usb_ohci_td_bad_direction(dir);
>> > @@ -936,8 +941,8 @@ static int ohci_service_td(OHCIState *ohci, struct
>> > ohci_ed *ed)
>> >           if ((td.cbp & 0xfffff000) != (td.be & 0xfffff000)) {
>> >               len = (td.be & 0xfff) + 0x1001 - (td.cbp & 0xfff);
>> >           } else {
>> > -            if (td.cbp > td.be) {
>> > -                trace_usb_ohci_iso_td_bad_cc_overrun(td.cbp, td.be);
>> > +            if (td.cbp > td.be + 1) {
>> > +                trace_usb_ohci_td_bad_buf(td.cbp, td.be);
>> >                   ohci_die(ohci);
>> >                   return 1;
>> >               }
>> > diff --git a/hw/usb/trace-events b/hw/usb/trace-events
>> > index ed7dc210d3..b47d082fa3 100644
>> > --- a/hw/usb/trace-events
>> > +++ b/hw/usb/trace-events
>> > @@ -28,6 +28,8 @@ usb_ohci_iso_td_data_overrun(int ret, ssize_t len)
>> > "DataOverrun %d > %zu"
>> >   usb_ohci_iso_td_data_underrun(int ret) "DataUnderrun %d"
>> >   usb_ohci_iso_td_nak(int ret) "got NAK/STALL %d"
>> >   usb_ohci_iso_td_bad_response(int ret) "Bad device response %d"
>> > +usb_ohci_td_bad_buf(uint32_t cbp, uint32_t be) "Bad cbp = 0x%x > be =
>> 0x%x"
>> > +usb_ohci_td_bad_pid(const char *s, uint32_t edf, uint32_t tdf) "Bad
>> > pid %s: ed.flags 0x%x td.flags 0x%x"
>> >   usb_ohci_port_attach(int index) "port #%d"
>> >   usb_ohci_port_detach(int index) "port #%d"
>> >   usb_ohci_port_wakeup(int index) "port #%d"
>> >
>>
>
Thomas Huth May 8, 2024, 8:44 a.m. UTC | #5
On 07/05/2024 22.20, Cord Amfmgm wrote:
> 
> 
> On Wed, Apr 24, 2024 at 3:43 PM Cord Amfmgm <dmamfmgm@gmail.com 
> <mailto:dmamfmgm@gmail.com>> wrote:
> 
>     On Thu, Apr 18, 2024 at 10:43 AM Michael Tokarev <mjt@tls.msk.ru
>     <mailto:mjt@tls.msk.ru>> wrote:
> 
>         06.02.2024 10:13, Cord Amfmgm wrote:
>          > This changes the ohci validation to not assert if invalid
>          > data is fed to the ohci controller. The poc suggested in
>          > https://bugs.launchpad.net/qemu/+bug/1907042
>         <https://bugs.launchpad.net/qemu/+bug/1907042>
>          > and then migrated to bug #303 does the following to
>          > feed it a SETUP pid and EndPt of 1:
>          >
>          >          uint32_t MaxPacket = 64;
>          >          uint32_t TDFormat = 0;
>          >          uint32_t Skip = 0;
>          >          uint32_t Speed = 0;
>          >          uint32_t Direction = 0;  /* #define OHCI_TD_DIR_SETUP 0 */
>          >          uint32_t EndPt = 1;
>          >          uint32_t FuncAddress = 0;
>          >          ed->attr = (MaxPacket << 16) | (TDFormat << 15) | (Skip
>         << 14)
>          >                     | (Speed << 13) | (Direction << 11) | (EndPt
>         << 7)
>          >                     | FuncAddress;
>          >          ed->tailp = /*TDQTailPntr= */ 0;
>          >          ed->headp = ((/*TDQHeadPntr= */ &td[0]) & 0xfffffff0)
>          >                     | (/* ToggleCarry= */ 0 << 1);
>          >          ed->next_ed = (/* NextED= */ 0 & 0xfffffff0)
>          >
>          > qemu-fuzz also caught the same issue in #1510. They are
>          > both fixed by this patch.
>          >
>          > The if (td.cbp > td.be <http://td.be>) logic in ohci_service_td()
>         causes an
>          > ohci_die(). My understanding of the OHCI spec 4.3.1.2
>          > Table 4-2 allows td.cbp to be one byte more than td.be
>         <http://td.be> to
>          > signal the buffer has zero length. The new check in qemu
>          > appears to have been added since qemu-4.2. This patch
>          > includes both fixes since they are located very close
>          > together.
>          >
>          > Signed-off-by: David Hubbard <dmamfmgm@gmail.com
>         <mailto:dmamfmgm@gmail.com>>

Your Signed-off-by line does not match the From: line ... could you please 
fix this? (see 
https://www.qemu.org/docs/master/devel/submitting-a-patch.html#patch-emails-must-include-a-signed-off-by-line 
, too)

>         Wonder if this got lost somehow.  Or is it not needed?
> 
>         Thanks,
> 
>         /mjt
> 
> 
>     Friendly ping! Gerd, can you chime in with how you would like to
>     approach this? I still need this patch to unblock my qemu workflow -
>     custom OS development.
> 
> 
> Can I please ask for an update on this? I'm attempting to figure out if this 
> patch has been rejected and I need to resubmit / rework it at HEAD?

Looks like it's hard to find someone who still can review OHCI patches these 
days...

Anyway, I tried to get the reproducer running that had been added to the 
original patch (installed an Ubuntu 18.04 guest and compiled and ran that 
ohci_poc program in it), but so far, I failed. Could you please provide 
detailed steps how you can still produce this issue with the latest version 
of QEMU, please?

  Thanks,
   Thomas
Philippe Mathieu-Daudé May 8, 2024, 9:53 a.m. UTC | #6
On 7/5/24 22:20, Cord Amfmgm wrote:
> 
> 
> On Wed, Apr 24, 2024 at 3:43 PM Cord Amfmgm <dmamfmgm@gmail.com 
> <mailto:dmamfmgm@gmail.com>> wrote:
> 
>     On Thu, Apr 18, 2024 at 10:43 AM Michael Tokarev <mjt@tls.msk.ru
>     <mailto:mjt@tls.msk.ru>> wrote:
> 
>         06.02.2024 10:13, Cord Amfmgm wrote:
>          > This changes the ohci validation to not assert if invalid
>          > data is fed to the ohci controller. The poc suggested in
>          > https://bugs.launchpad.net/qemu/+bug/1907042
>         <https://bugs.launchpad.net/qemu/+bug/1907042>
>          > and then migrated to bug #303 does the following to
>          > feed it a SETUP pid and EndPt of 1:
>          >
>          >          uint32_t MaxPacket = 64;
>          >          uint32_t TDFormat = 0;
>          >          uint32_t Skip = 0;
>          >          uint32_t Speed = 0;
>          >          uint32_t Direction = 0;  /* #define
>         OHCI_TD_DIR_SETUP 0 */
>          >          uint32_t EndPt = 1;
>          >          uint32_t FuncAddress = 0;
>          >          ed->attr = (MaxPacket << 16) | (TDFormat << 15) |
>         (Skip << 14)
>          >                     | (Speed << 13) | (Direction << 11) |
>         (EndPt << 7)
>          >                     | FuncAddress;
>          >          ed->tailp = /*TDQTailPntr= */ 0;
>          >          ed->headp = ((/*TDQHeadPntr= */ &td[0]) & 0xfffffff0)
>          >                     | (/* ToggleCarry= */ 0 << 1);
>          >          ed->next_ed = (/* NextED= */ 0 & 0xfffffff0)
>          >
>          > qemu-fuzz also caught the same issue in #1510. They are
>          > both fixed by this patch.
>          >
>          > The if (td.cbp > td.be <http://td.be>) logic in
>         ohci_service_td() causes an
>          > ohci_die(). My understanding of the OHCI spec 4.3.1.2
>          > Table 4-2 allows td.cbp to be one byte more than td.be
>         <http://td.be> to
>          > signal the buffer has zero length. The new check in qemu
>          > appears to have been added since qemu-4.2. This patch
>          > includes both fixes since they are located very close
>          > together.
>          >
>          > Signed-off-by: David Hubbard <dmamfmgm@gmail.com
>         <mailto:dmamfmgm@gmail.com>>
> 
>         Wonder if this got lost somehow.  Or is it not needed?
> 
>         Thanks,
> 
>         /mjt
> 
> 
>     Friendly ping! Gerd, can you chime in with how you would like to
>     approach this? I still need this patch to unblock my qemu workflow -
>     custom OS development.
> 
> 
> Can I please ask for an update on this? I'm attempting to figure out if 
> this patch has been rejected and I need to resubmit / rework it at HEAD?
> 
> 
>          > diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
>          > index d73b53f33c..a53808126f 100644
>          > --- a/hw/usb/hcd-ohci.c
>          > +++ b/hw/usb/hcd-ohci.c
>          > @@ -927,6 +927,11 @@ static int ohci_service_td(OHCIState *ohci,
>          > struct ohci_ed *ed)
>          >       case OHCI_TD_DIR_SETUP:
>          >           str = "setup";
>          >           pid = USB_TOKEN_SETUP;
>          > +        if (OHCI_BM(ed->flags, ED_EN) > 0) {  /* setup only
>         allowed to ep 0 */
>          > +            trace_usb_ohci_td_bad_pid(str, ed->flags, td.flags);
>          > +            ohci_die(ohci);
>          > +            return 1;
>          > +        }
>          >           break;

I made a comment on April 18 but it is not showing on the list...
https://lore.kernel.org/qemu-devel/593072d7-614b-4197-9c9a-12bb70c31d31@linaro.org/

It was:

 > Please split in 2 different patches.

Even if closely related, it simplifies the workflow to have
single fix in single commit; for example if one is invalid,
we can revert it and not the other.

>          >       default:
>          >           trace_usb_ohci_td_bad_direction(dir);
>          > @@ -936,8 +941,8 @@ static int ohci_service_td(OHCIState
>         *ohci, struct
>          > ohci_ed *ed)
>          >           if ((td.cbp & 0xfffff000) != (td.be <http://td.be>
>         & 0xfffff000)) {
>          >               len = (td.be <http://td.be> & 0xfff) + 0x1001 -
>         (td.cbp & 0xfff);
>          >           } else {
>          > -            if (td.cbp > td.be <http://td.be>) {
>          > -                trace_usb_ohci_iso_td_bad_cc_overrun(td.cbp,
>         td.be <http://td.be>);
>          > +            if (td.cbp > td.be <http://td.be> + 1) {
>          > +                trace_usb_ohci_td_bad_buf(td.cbp, td.be
>         <http://td.be>);
>          >                   ohci_die(ohci);
>          >                   return 1;
>          >               }
>          > diff --git a/hw/usb/trace-events b/hw/usb/trace-events
>          > index ed7dc210d3..b47d082fa3 100644
>          > --- a/hw/usb/trace-events
>          > +++ b/hw/usb/trace-events
>          > @@ -28,6 +28,8 @@ usb_ohci_iso_td_data_overrun(int ret,
>         ssize_t len)
>          > "DataOverrun %d > %zu"
>          >   usb_ohci_iso_td_data_underrun(int ret) "DataUnderrun %d"
>          >   usb_ohci_iso_td_nak(int ret) "got NAK/STALL %d"
>          >   usb_ohci_iso_td_bad_response(int ret) "Bad device response %d"
>          > +usb_ohci_td_bad_buf(uint32_t cbp, uint32_t be) "Bad cbp =
>         0x%x > be = 0x%x"
>          > +usb_ohci_td_bad_pid(const char *s, uint32_t edf, uint32_t
>         tdf) "Bad
>          > pid %s: ed.flags 0x%x td.flags 0x%x"
>          >   usb_ohci_port_attach(int index) "port #%d"
>          >   usb_ohci_port_detach(int index) "port #%d"
>          >   usb_ohci_port_wakeup(int index) "port #%d"
>          >
>
Cord Amfmgm May 8, 2024, 3:28 p.m. UTC | #7
On Wed, May 8, 2024 at 4:53 AM Philippe Mathieu-Daudé <philmd@linaro.org>
wrote:

> On 7/5/24 22:20, Cord Amfmgm wrote:
> >
> >
> > On Wed, Apr 24, 2024 at 3:43 PM Cord Amfmgm <dmamfmgm@gmail.com
> > <mailto:dmamfmgm@gmail.com>> wrote:
> >
> >     On Thu, Apr 18, 2024 at 10:43 AM Michael Tokarev <mjt@tls.msk.ru
> >     <mailto:mjt@tls.msk.ru>> wrote:
> >
> >         06.02.2024 10:13, Cord Amfmgm wrote:
> >          > This changes the ohci validation to not assert if invalid
> >          > data is fed to the ohci controller. The poc suggested in
> >          > https://bugs.launchpad.net/qemu/+bug/1907042
> >         <https://bugs.launchpad.net/qemu/+bug/1907042>
> >          > and then migrated to bug #303 does the following to
> >          > feed it a SETUP pid and EndPt of 1:
> >          >
> >          >          uint32_t MaxPacket = 64;
> >          >          uint32_t TDFormat = 0;
> >          >          uint32_t Skip = 0;
> >          >          uint32_t Speed = 0;
> >          >          uint32_t Direction = 0;  /* #define
> >         OHCI_TD_DIR_SETUP 0 */
> >          >          uint32_t EndPt = 1;
> >          >          uint32_t FuncAddress = 0;
> >          >          ed->attr = (MaxPacket << 16) | (TDFormat << 15) |
> >         (Skip << 14)
> >          >                     | (Speed << 13) | (Direction << 11) |
> >         (EndPt << 7)
> >          >                     | FuncAddress;
> >          >          ed->tailp = /*TDQTailPntr= */ 0;
> >          >          ed->headp = ((/*TDQHeadPntr= */ &td[0]) & 0xfffffff0)
> >          >                     | (/* ToggleCarry= */ 0 << 1);
> >          >          ed->next_ed = (/* NextED= */ 0 & 0xfffffff0)
> >          >
> >          > qemu-fuzz also caught the same issue in #1510. They are
> >          > both fixed by this patch.
> >          >
> >          > The if (td.cbp > td.be <http://td.be>) logic in
> >         ohci_service_td() causes an
> >          > ohci_die(). My understanding of the OHCI spec 4.3.1.2
> >          > Table 4-2 allows td.cbp to be one byte more than td.be
> >         <http://td.be> to
> >          > signal the buffer has zero length. The new check in qemu
> >          > appears to have been added since qemu-4.2. This patch
> >          > includes both fixes since they are located very close
> >          > together.
> >          >
> >          > Signed-off-by: David Hubbard <dmamfmgm@gmail.com
> >         <mailto:dmamfmgm@gmail.com>>
> >
> >         Wonder if this got lost somehow.  Or is it not needed?
> >
> >         Thanks,
> >
> >         /mjt
> >
> >
> >     Friendly ping! Gerd, can you chime in with how you would like to
> >     approach this? I still need this patch to unblock my qemu workflow -
> >     custom OS development.
> >
> >
> > Can I please ask for an update on this? I'm attempting to figure out if
> > this patch has been rejected and I need to resubmit / rework it at HEAD?
> >
> >
> >          > diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
> >          > index d73b53f33c..a53808126f 100644
> >          > --- a/hw/usb/hcd-ohci.c
> >          > +++ b/hw/usb/hcd-ohci.c
> >          > @@ -927,6 +927,11 @@ static int ohci_service_td(OHCIState
> *ohci,
> >          > struct ohci_ed *ed)
> >          >       case OHCI_TD_DIR_SETUP:
> >          >           str = "setup";
> >          >           pid = USB_TOKEN_SETUP;
> >          > +        if (OHCI_BM(ed->flags, ED_EN) > 0) {  /* setup only
> >         allowed to ep 0 */
> >          > +            trace_usb_ohci_td_bad_pid(str, ed->flags,
> td.flags);
> >          > +            ohci_die(ohci);
> >          > +            return 1;
> >          > +        }
> >          >           break;
>
> I made a comment on April 18 but it is not showing on the list...
>
> https://lore.kernel.org/qemu-devel/593072d7-614b-4197-9c9a-12bb70c31d31@linaro.org/
>
> It was:
>
>  > Please split in 2 different patches.
>
> Even if closely related, it simplifies the workflow to have
> single fix in single commit; for example if one is invalid,
> we can revert it and not the other.
>

Sure, I can submit 2 separate patches. I'm unfamiliar with how to get those
to show up in this patch request, I assume it's not too bad if I submit
that as a separate patch request?

On Wed, May 8, 2024 at 3:45 AM Thomas Huth <thuth@redhat.com> wrote:

> Your Signed-off-by line does not match the From: line ... could you please
> fix this? (see
>
> https://www.qemu.org/docs/master/devel/submitting-a-patch.html#patch-emails-must-include-a-signed-off-by-line
> , too)


I'll submit the new patch request with my pseudonym in the From: and
Signed-off-by: lines, per your request. Doesn't matter to me. However, this
arises simply because I don't give gmail my real name -
https://en.wikipedia.org/wiki/Nymwars


>
> >          >       default:
> >          >           trace_usb_ohci_td_bad_direction(dir);
> >          > @@ -936,8 +941,8 @@ static int ohci_service_td(OHCIState
> >         *ohci, struct
> >          > ohci_ed *ed)
> >          >           if ((td.cbp & 0xfffff000) != (td.be <http://td.be>
> >         & 0xfffff000)) {
> >          >               len = (td.be <http://td.be> & 0xfff) + 0x1001 -
> >         (td.cbp & 0xfff);
> >          >           } else {
> >          > -            if (td.cbp > td.be <http://td.be>) {
> >          > -                trace_usb_ohci_iso_td_bad_cc_overrun(td.cbp,
> >         td.be <http://td.be>);
> >          > +            if (td.cbp > td.be <http://td.be> + 1) {
> >          > +                trace_usb_ohci_td_bad_buf(td.cbp, td.be
> >         <http://td.be>);
> >          >                   ohci_die(ohci);
> >          >                   return 1;
> >          >               }
> >          > diff --git a/hw/usb/trace-events b/hw/usb/trace-events
> >          > index ed7dc210d3..b47d082fa3 100644
> >          > --- a/hw/usb/trace-events
> >          > +++ b/hw/usb/trace-events
> >          > @@ -28,6 +28,8 @@ usb_ohci_iso_td_data_overrun(int ret,
> >         ssize_t len)
> >          > "DataOverrun %d > %zu"
> >          >   usb_ohci_iso_td_data_underrun(int ret) "DataUnderrun %d"
> >          >   usb_ohci_iso_td_nak(int ret) "got NAK/STALL %d"
> >          >   usb_ohci_iso_td_bad_response(int ret) "Bad device response
> %d"
> >          > +usb_ohci_td_bad_buf(uint32_t cbp, uint32_t be) "Bad cbp =
> >         0x%x > be = 0x%x"
> >          > +usb_ohci_td_bad_pid(const char *s, uint32_t edf, uint32_t
> >         tdf) "Bad
> >          > pid %s: ed.flags 0x%x td.flags 0x%x"
> >          >   usb_ohci_port_attach(int index) "port #%d"
> >          >   usb_ohci_port_detach(int index) "port #%d"
> >          >   usb_ohci_port_wakeup(int index) "port #%d"
> >          >
> >
>
>
Cord Amfmgm May 9, 2024, 12:32 a.m. UTC | #8
On Wed, May 8, 2024 at 10:28 AM Cord Amfmgm <dmamfmgm@gmail.com> wrote:

>
>
> On Wed, May 8, 2024 at 4:53 AM Philippe Mathieu-Daudé <philmd@linaro.org>
> wrote:
>
>> On 7/5/24 22:20, Cord Amfmgm wrote:
>> >
>> >
>> > On Wed, Apr 24, 2024 at 3:43 PM Cord Amfmgm <dmamfmgm@gmail.com
>> > <mailto:dmamfmgm@gmail.com>> wrote:
>> >
>> >     On Thu, Apr 18, 2024 at 10:43 AM Michael Tokarev <mjt@tls.msk.ru
>> >     <mailto:mjt@tls.msk.ru>> wrote:
>> >
>> >         06.02.2024 10:13, Cord Amfmgm wrote:
>> >          > This changes the ohci validation to not assert if invalid
>> >          > data is fed to the ohci controller. The poc suggested in
>> >          > https://bugs.launchpad.net/qemu/+bug/1907042
>> >         <https://bugs.launchpad.net/qemu/+bug/1907042>
>> >          > and then migrated to bug #303 does the following to
>> >          > feed it a SETUP pid and EndPt of 1:
>> >          >
>> >          >          uint32_t MaxPacket = 64;
>> >          >          uint32_t TDFormat = 0;
>> >          >          uint32_t Skip = 0;
>> >          >          uint32_t Speed = 0;
>> >          >          uint32_t Direction = 0;  /* #define
>> >         OHCI_TD_DIR_SETUP 0 */
>> >          >          uint32_t EndPt = 1;
>> >          >          uint32_t FuncAddress = 0;
>> >          >          ed->attr = (MaxPacket << 16) | (TDFormat << 15) |
>> >         (Skip << 14)
>> >          >                     | (Speed << 13) | (Direction << 11) |
>> >         (EndPt << 7)
>> >          >                     | FuncAddress;
>> >          >          ed->tailp = /*TDQTailPntr= */ 0;
>> >          >          ed->headp = ((/*TDQHeadPntr= */ &td[0]) &
>> 0xfffffff0)
>> >          >                     | (/* ToggleCarry= */ 0 << 1);
>> >          >          ed->next_ed = (/* NextED= */ 0 & 0xfffffff0)
>> >          >
>> >          > qemu-fuzz also caught the same issue in #1510. They are
>> >          > both fixed by this patch.
>> >          >
>> >          > The if (td.cbp > td.be <http://td.be>) logic in
>> >         ohci_service_td() causes an
>> >          > ohci_die(). My understanding of the OHCI spec 4.3.1.2
>> >          > Table 4-2 allows td.cbp to be one byte more than td.be
>> >         <http://td.be> to
>> >          > signal the buffer has zero length. The new check in qemu
>> >          > appears to have been added since qemu-4.2. This patch
>> >          > includes both fixes since they are located very close
>> >          > together.
>> >          >
>> >          > Signed-off-by: David Hubbard <dmamfmgm@gmail.com
>> >         <mailto:dmamfmgm@gmail.com>>
>> >
>> >         Wonder if this got lost somehow.  Or is it not needed?
>> >
>> >         Thanks,
>> >
>> >         /mjt
>> >
>> >
>> >     Friendly ping! Gerd, can you chime in with how you would like to
>> >     approach this? I still need this patch to unblock my qemu workflow -
>> >     custom OS development.
>> >
>> >
>> > Can I please ask for an update on this? I'm attempting to figure out if
>> > this patch has been rejected and I need to resubmit / rework it at HEAD?
>> >
>> >
>> >          > diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
>> >          > index d73b53f33c..a53808126f 100644
>> >          > --- a/hw/usb/hcd-ohci.c
>> >          > +++ b/hw/usb/hcd-ohci.c
>> >          > @@ -927,6 +927,11 @@ static int ohci_service_td(OHCIState
>> *ohci,
>> >          > struct ohci_ed *ed)
>> >          >       case OHCI_TD_DIR_SETUP:
>> >          >           str = "setup";
>> >          >           pid = USB_TOKEN_SETUP;
>> >          > +        if (OHCI_BM(ed->flags, ED_EN) > 0) {  /* setup only
>> >         allowed to ep 0 */
>> >          > +            trace_usb_ohci_td_bad_pid(str, ed->flags,
>> td.flags);
>> >          > +            ohci_die(ohci);
>> >          > +            return 1;
>> >          > +        }
>> >          >           break;
>>
>> I made a comment on April 18 but it is not showing on the list...
>>
>> https://lore.kernel.org/qemu-devel/593072d7-614b-4197-9c9a-12bb70c31d31@linaro.org/
>>
>> It was:
>>
>>  > Please split in 2 different patches.
>>
>> Even if closely related, it simplifies the workflow to have
>> single fix in single commit; for example if one is invalid,
>> we can revert it and not the other.
>>
>
> Sure, I can submit 2 separate patches. I'm unfamiliar with how to get
> those to show up in this patch request, I assume it's not too bad if I
> submit that as a separate patch request?
>
> On Wed, May 8, 2024 at 3:45 AM Thomas Huth <thuth@redhat.com> wrote:
>
>> Your Signed-off-by line does not match the From: line ... could you please
>> fix this? (see
>>
>> https://www.qemu.org/docs/master/devel/submitting-a-patch.html#patch-emails-must-include-a-signed-off-by-line
>> , too)
>
>
> I'll submit the new patch request with my pseudonym in the From: and
> Signed-off-by: lines, per your request. Doesn't matter to me. However, this
> arises simply because I don't give gmail my real name -
> https://en.wikipedia.org/wiki/Nymwars
>
>

I've sent the new patches just now. The repro disk images mentioned in the
patch descriptions are attached to this email.


>
>> >          >       default:
>> >          >           trace_usb_ohci_td_bad_direction(dir);
>> >          > @@ -936,8 +941,8 @@ static int ohci_service_td(OHCIState
>> >         *ohci, struct
>> >          > ohci_ed *ed)
>> >          >           if ((td.cbp & 0xfffff000) != (td.be <http://td.be>
>> >         & 0xfffff000)) {
>> >          >               len = (td.be <http://td.be> & 0xfff) + 0x1001
>> -
>> >         (td.cbp & 0xfff);
>> >          >           } else {
>> >          > -            if (td.cbp > td.be <http://td.be>) {
>> >          > -                trace_usb_ohci_iso_td_bad_cc_overrun(td.cbp,
>> >         td.be <http://td.be>);
>> >          > +            if (td.cbp > td.be <http://td.be> + 1) {
>> >          > +                trace_usb_ohci_td_bad_buf(td.cbp, td.be
>> >         <http://td.be>);
>> >          >                   ohci_die(ohci);
>> >          >                   return 1;
>> >          >               }
>> >          > diff --git a/hw/usb/trace-events b/hw/usb/trace-events
>> >          > index ed7dc210d3..b47d082fa3 100644
>> >          > --- a/hw/usb/trace-events
>> >          > +++ b/hw/usb/trace-events
>> >          > @@ -28,6 +28,8 @@ usb_ohci_iso_td_data_overrun(int ret,
>> >         ssize_t len)
>> >          > "DataOverrun %d > %zu"
>> >          >   usb_ohci_iso_td_data_underrun(int ret) "DataUnderrun %d"
>> >          >   usb_ohci_iso_td_nak(int ret) "got NAK/STALL %d"
>> >          >   usb_ohci_iso_td_bad_response(int ret) "Bad device response
>> %d"
>> >          > +usb_ohci_td_bad_buf(uint32_t cbp, uint32_t be) "Bad cbp =
>> >         0x%x > be = 0x%x"
>> >          > +usb_ohci_td_bad_pid(const char *s, uint32_t edf, uint32_t
>> >         tdf) "Bad
>> >          > pid %s: ed.flags 0x%x td.flags 0x%x"
>> >          >   usb_ohci_port_attach(int index) "port #%d"
>> >          >   usb_ohci_port_detach(int index) "port #%d"
>> >          >   usb_ohci_port_wakeup(int index) "port #%d"
>> >          >
>> >
>>
>>
Peter Maydell May 9, 2024, 5:48 p.m. UTC | #9
On Wed, 8 May 2024 at 16:29, Cord Amfmgm <dmamfmgm@gmail.com> wrote:
> On Wed, May 8, 2024 at 3:45 AM Thomas Huth <thuth@redhat.com> wrote:
>>
>> Your Signed-off-by line does not match the From: line ... could you please
>> fix this? (see
>> https://www.qemu.org/docs/master/devel/submitting-a-patch.html#patch-emails-must-include-a-signed-off-by-line
>> , too)
>
>
> I'll submit the new patch request with my pseudonym in the From: and Signed-off-by: lines, per your request. Doesn't matter to me. However, this arises simply because I don't give gmail my real name - https://en.wikipedia.org/wiki/Nymwars

I'm confused now. Of the two names you've used in this
patch (Cord Amfmgm and David Hubbard), are they both
pseudonyms, or is one a pseudonym and one your real name?

thanks
-- PMM
Cord Amfmgm May 9, 2024, 6:16 p.m. UTC | #10
On Thu, May 9, 2024 at 12:48 PM Peter Maydell <peter.maydell@linaro.org>
wrote:

> On Wed, 8 May 2024 at 16:29, Cord Amfmgm <dmamfmgm@gmail.com> wrote:
> > On Wed, May 8, 2024 at 3:45 AM Thomas Huth <thuth@redhat.com> wrote:
> >>
> >> Your Signed-off-by line does not match the From: line ... could you
> please
> >> fix this? (see
> >>
> https://www.qemu.org/docs/master/devel/submitting-a-patch.html#patch-emails-must-include-a-signed-off-by-line
> >> , too)
> >
> >
> > I'll submit the new patch request with my pseudonym in the From: and
> Signed-off-by: lines, per your request. Doesn't matter to me. However, this
> arises simply because I don't give gmail my real name -
> https://en.wikipedia.org/wiki/Nymwars
>
> I'm confused now. Of the two names you've used in this
> patch (Cord Amfmgm and David Hubbard), are they both
> pseudonyms, or is one a pseudonym and one your real name?
>
>
Hi Peter,

I am attempting to submit a small patch. For context, I'm getting broader
attention now because apparently OHCI is one of the less used components of
qemu and maybe the review process was taking a while. That's relevant
because I wasn't able to get prompt feedback and am now choosing what
appears to be the most expeditious approach -- all I want is to get this
patch done and be out of your hair. If Thomas Huth wants me to use a
consistent name, have I not complied? Are you asking out of curiosity or is
there a valid reason why I should answer your question in order to get the
patch submitted? Would you like to have a friendly chat over virtual coffee
sometime (but off-list)?

If you could please clarify I'm sure the answer is an easy one.

Thanks
BALATON Zoltan May 9, 2024, 8:37 p.m. UTC | #11
On Thu, 9 May 2024, Cord Amfmgm wrote:
> On Thu, May 9, 2024 at 12:48 PM Peter Maydell <peter.maydell@linaro.org>
> wrote:
>
>> On Wed, 8 May 2024 at 16:29, Cord Amfmgm <dmamfmgm@gmail.com> wrote:
>>> On Wed, May 8, 2024 at 3:45 AM Thomas Huth <thuth@redhat.com> wrote:
>>>>
>>>> Your Signed-off-by line does not match the From: line ... could you
>> please
>>>> fix this? (see
>>>>
>> https://www.qemu.org/docs/master/devel/submitting-a-patch.html#patch-emails-must-include-a-signed-off-by-line
>>>> , too)
>>>
>>>
>>> I'll submit the new patch request with my pseudonym in the From: and
>> Signed-off-by: lines, per your request. Doesn't matter to me. However, this
>> arises simply because I don't give gmail my real name -
>> https://en.wikipedia.org/wiki/Nymwars
>>
>> I'm confused now. Of the two names you've used in this
>> patch (Cord Amfmgm and David Hubbard), are they both
>> pseudonyms, or is one a pseudonym and one your real name?
>>
>>
> Hi Peter,
>
> I am attempting to submit a small patch. For context, I'm getting broader
> attention now because apparently OHCI is one of the less used components of
> qemu and maybe the review process was taking a while. That's relevant
> because I wasn't able to get prompt feedback and am now choosing what
> appears to be the most expeditious approach -- all I want is to get this
> patch done and be out of your hair. If Thomas Huth wants me to use a
> consistent name, have I not complied? Are you asking out of curiosity or is
> there a valid reason why I should answer your question in order to get the
> patch submitted? Would you like to have a friendly chat over virtual coffee
> sometime (but off-list)?

See here:
https://www.qemu.org/docs/master/devel/submitting-a-patch.html#patch-emails-must-include-a-signed-off-by-line
and also the document linked from there:
http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/SubmittingPatches?id=f6f94e2ab1b33f0082ac22d71f66385a60d8157f#n297

As for getting the patch reviewed, it may be difficult as the USB 
maintainer is practically absent and has no time for QEMU for a while and 
as OHCI as you said is not odten used there aren't many people who could 
review it. Getting at least the formal stuff out of the way may help 
though to get somebody to try to review the patch.

Regards,
BALATON Zoltan
Cord Amfmgm May 10, 2024, 7:08 a.m. UTC | #12
On Thu, May 9, 2024 at 3:37 PM BALATON Zoltan <balaton@eik.bme.hu> wrote:

> On Thu, 9 May 2024, Cord Amfmgm wrote:
> > On Thu, May 9, 2024 at 12:48 PM Peter Maydell <peter.maydell@linaro.org>
> > wrote:
> >
> >> On Wed, 8 May 2024 at 16:29, Cord Amfmgm <dmamfmgm@gmail.com> wrote:
> >>> On Wed, May 8, 2024 at 3:45 AM Thomas Huth <thuth@redhat.com> wrote:
> >>>>
> >>>> Your Signed-off-by line does not match the From: line ... could you
> >> please
> >>>> fix this? (see
> >>>>
> >>
> https://www.qemu.org/docs/master/devel/submitting-a-patch.html#patch-emails-must-include-a-signed-off-by-line
> >>>> , too)
> >>>
> >>>
> >>> I'll submit the new patch request with my pseudonym in the From: and
> >> Signed-off-by: lines, per your request. Doesn't matter to me. However,
> this
> >> arises simply because I don't give gmail my real name -
> >> https://en.wikipedia.org/wiki/Nymwars
> >>
> >> I'm confused now. Of the two names you've used in this
> >> patch (Cord Amfmgm and David Hubbard), are they both
> >> pseudonyms, or is one a pseudonym and one your real name?
> >>
> >>
> > Hi Peter,
> >
> > I am attempting to submit a small patch. For context, I'm getting broader
> > attention now because apparently OHCI is one of the less used components
> of
> > qemu and maybe the review process was taking a while. That's relevant
> > because I wasn't able to get prompt feedback and am now choosing what
> > appears to be the most expeditious approach -- all I want is to get this
> > patch done and be out of your hair. If Thomas Huth wants me to use a
> > consistent name, have I not complied? Are you asking out of curiosity or
> is
> > there a valid reason why I should answer your question in order to get
> the
> > patch submitted? Would you like to have a friendly chat over virtual
> coffee
> > sometime (but off-list)?
>
> See here:
>
> https://www.qemu.org/docs/master/devel/submitting-a-patch.html#patch-emails-must-include-a-signed-off-by-line
> and also the document linked from there:
>
> http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/SubmittingPatches?id=f6f94e2ab1b33f0082ac22d71f66385a60d8157f#n297


Yeah the policy makes sense. So it sounds like we're all good for that.


>
>
> As for getting the patch reviewed, it may be difficult as the USB
> maintainer is practically absent and has no time for QEMU for a while and
> as OHCI as you said is not odten used there aren't many people who could
> review it. Getting at least the formal stuff out of the way may help
> though to get somebody to try to review the patch.
>
> Regards,
> BALATON Zoltan


I understand. Well, that's unfortunate that the patch is going back on the
backlog. I'll leave it alone then?

There's always the option if anyone has an old enough system that the EHCI
on it has an actual OHCI companion controller, then they can use actual
hardware to validate the behavior. Barring some message saying the patch
has been approved or that someone wants me to rework the patch, I'll leave
this as abandoned.
Peter Maydell May 11, 2024, 10:25 a.m. UTC | #13
On Thu, 9 May 2024 at 19:17, Cord Amfmgm <dmamfmgm@gmail.com> wrote:
>
>
>
> On Thu, May 9, 2024 at 12:48 PM Peter Maydell <peter.maydell@linaro.org> wrote:
>>
>> On Wed, 8 May 2024 at 16:29, Cord Amfmgm <dmamfmgm@gmail.com> wrote:
>> > On Wed, May 8, 2024 at 3:45 AM Thomas Huth <thuth@redhat.com> wrote:
>> >>
>> >> Your Signed-off-by line does not match the From: line ... could you please
>> >> fix this? (see
>> >> https://www.qemu.org/docs/master/devel/submitting-a-patch.html#patch-emails-must-include-a-signed-off-by-line
>> >> , too)
>> >
>> >
>> > I'll submit the new patch request with my pseudonym in the From: and Signed-off-by: lines, per your request. Doesn't matter to me. However, this arises simply because I don't give gmail my real name - https://en.wikipedia.org/wiki/Nymwars
>>
>> I'm confused now. Of the two names you've used in this
>> patch (Cord Amfmgm and David Hubbard), are they both
>> pseudonyms, or is one a pseudonym and one your real name?
>>
>
> Hi Peter,
>
> I am attempting to submit a small patch. For context, I'm getting broader attention now because apparently OHCI is one of the less used components of qemu and maybe the review process was taking a while. That's relevant because I wasn't able to get prompt feedback and am now choosing what appears to be the most expeditious approach -- all I want is to get this patch done and be out of your hair. If Thomas Huth wants me to use a consistent name, have I not complied? Are you asking out of curiosity or is there a valid reason why I should answer your question in order to get the patch submitted? Would you like to have a friendly chat over virtual coffee sometime (but off-list)?
>
> If you could please clarify I'm sure the answer is an easy one.

I'm asking because our basic expected position is "commits
are from the submitter's actual name, not a pseudonym". Obviously
we can't tell if people use a consistent plausible looking
pseudonym whether that corresponds to their real-world name
or not, but if you have a real name you're happy to attach
to this patch and are merely using a pseudonym for Google
email, then the resubmit of this patch didn't seem to me
to do that. i.e. I was expecting the change to be "make the
patch From: match the Signed-off-by line", not "make the
Signed-off-by line match the patch From:". (For avoidance
of doubt, we don't care about the email From: line, which
is distinct from the commit message From: i.e. author.)
So I was essentially asking "did you mean to do this, or did
you misunderstand what we were asking for?".

On the question of the actual patch, I'll try to get to it
if Gerd doesn't first (though I have a conference next week
so it might be the week after). The main thing I need to chase
down is whether it's OK to call usb_packet_addbuf() with a
zero length or not.

thanks
-- PMM
Cord Amfmgm May 12, 2024, 4:24 p.m. UTC | #14
On Sat, May 11, 2024 at 5:25 AM Peter Maydell <peter.maydell@linaro.org>
wrote:

> On Thu, 9 May 2024 at 19:17, Cord Amfmgm <dmamfmgm@gmail.com> wrote:
> >
> >
> >
> > On Thu, May 9, 2024 at 12:48 PM Peter Maydell <peter.maydell@linaro.org>
> wrote:
> >>
> >> On Wed, 8 May 2024 at 16:29, Cord Amfmgm <dmamfmgm@gmail.com> wrote:
> >> > On Wed, May 8, 2024 at 3:45 AM Thomas Huth <thuth@redhat.com> wrote:
> >> >>
> >> >> Your Signed-off-by line does not match the From: line ... could you
> please
> >> >> fix this? (see
> >> >>
> https://www.qemu.org/docs/master/devel/submitting-a-patch.html#patch-emails-must-include-a-signed-off-by-line
> >> >> , too)
> >> >
> >> >
> >> > I'll submit the new patch request with my pseudonym in the From: and
> Signed-off-by: lines, per your request. Doesn't matter to me. However, this
> arises simply because I don't give gmail my real name -
> https://en.wikipedia.org/wiki/Nymwars
> >>
> >> I'm confused now. Of the two names you've used in this
> >> patch (Cord Amfmgm and David Hubbard), are they both
> >> pseudonyms, or is one a pseudonym and one your real name?
> >>
> >
> > Hi Peter,
> >
> > I am attempting to submit a small patch. For context, I'm getting
> broader attention now because apparently OHCI is one of the less used
> components of qemu and maybe the review process was taking a while. That's
> relevant because I wasn't able to get prompt feedback and am now choosing
> what appears to be the most expeditious approach -- all I want is to get
> this patch done and be out of your hair. If Thomas Huth wants me to use a
> consistent name, have I not complied? Are you asking out of curiosity or is
> there a valid reason why I should answer your question in order to get the
> patch submitted? Would you like to have a friendly chat over virtual coffee
> sometime (but off-list)?
> >
> > If you could please clarify I'm sure the answer is an easy one.
>
> I'm asking because our basic expected position is "commits
> are from the submitter's actual name, not a pseudonym". Obviously
> we can't tell if people use a consistent plausible looking
> pseudonym whether that corresponds to their real-world name
> or not, but if you have a real name you're happy to attach
> to this patch and are merely using a pseudonym for Google
> email, then the resubmit of this patch didn't seem to me
> to do that. i.e. I was expecting the change to be "make the
> patch From: match the Signed-off-by line", not "make the
> Signed-off-by line match the patch From:". (For avoidance
> of doubt, we don't care about the email From: line, which
> is distinct from the commit message From: i.e. author.)
> So I was essentially asking "did you mean to do this, or did
> you misunderstand what we were asking for?".
>

I think that is what caught me off guard. I'm learning how to submit the
correctly formatted patch. I would very much like to disconnect the patch
From: from the email From: line.


> On the question of the actual patch, I'll try to get to it
> if Gerd doesn't first (though I have a conference next week
> so it might be the week after). The main thing I need to chase
> down is whether it's OK to call usb_packet_addbuf() with a
> zero length or not.
>

Good catch. I have no problem modifying the patch with better logic for a
zero length packet.
Peter Maydell May 20, 2024, 5:04 p.m. UTC | #15
On Tue, 6 Feb 2024 at 13:25, Cord Amfmgm <dmamfmgm@gmail.com> wrote:
>
> This changes the ohci validation to not assert if invalid
> data is fed to the ohci controller. The poc suggested in
> https://bugs.launchpad.net/qemu/+bug/1907042
> and then migrated to bug #303 does the following to
> feed it a SETUP pid and EndPt of 1:
>
>         uint32_t MaxPacket = 64;
>         uint32_t TDFormat = 0;
>         uint32_t Skip = 0;
>         uint32_t Speed = 0;
>         uint32_t Direction = 0;  /* #define OHCI_TD_DIR_SETUP 0 */
>         uint32_t EndPt = 1;
>         uint32_t FuncAddress = 0;
>         ed->attr = (MaxPacket << 16) | (TDFormat << 15) | (Skip << 14)
>                    | (Speed << 13) | (Direction << 11) | (EndPt << 7)
>                    | FuncAddress;
>         ed->tailp = /*TDQTailPntr= */ 0;
>         ed->headp = ((/*TDQHeadPntr= */ &td[0]) & 0xfffffff0)
>                    | (/* ToggleCarry= */ 0 << 1);
>         ed->next_ed = (/* NextED= */ 0 & 0xfffffff0)
>
> qemu-fuzz also caught the same issue in #1510. They are
> both fixed by this patch.
>
> The if (td.cbp > td.be) logic in ohci_service_td() causes an
> ohci_die(). My understanding of the OHCI spec 4.3.1.2
> Table 4-2 allows td.cbp to be one byte more than td.be to
> signal the buffer has zero length. The new check in qemu
> appears to have been added since qemu-4.2. This patch
> includes both fixes since they are located very close
> together.

For the "zero length buffer" case, do you have a more detailed
pointer to the bit of the spec that says that "cbp = be + 1" is a
valid way to specify a zero length buffer? Table 4-2 in the copy I
have says for CurrentBufferPointer "a value of 0 indicates
a zero-length data packet or that all bytes have been transferred",
and the sample host OS driver function QueueGeneralRequest()
later in the spec handles a 0 length packet by setting
  TD->HcTD.CBP = TD->HcTD.BE = 0;
(which our emulation's code does handle).

> @@ -936,8 +941,8 @@ static int ohci_service_td(OHCIState *ohci, struct
> ohci_ed *ed)
>          if ((td.cbp & 0xfffff000) != (td.be & 0xfffff000)) {
>              len = (td.be & 0xfff) + 0x1001 - (td.cbp & 0xfff);
>          } else {
> -            if (td.cbp > td.be) {
> -                trace_usb_ohci_iso_td_bad_cc_overrun(td.cbp, td.be);
> +            if (td.cbp > td.be + 1) {

I think this has an overflow if td.be is 0xffffffff.

> +                trace_usb_ohci_td_bad_buf(td.cbp, td.be);
>                  ohci_die(ohci);
>                  return 1;
>              }

(On the other hand having looked at the code I'm happy
now that having a len of 0 passed into usb_packet_addbuf()
is OK because we already do that for the "cbp = be = 0"
way of specifying a zero length packet.)

thanks
-- PMM
Cord Amfmgm May 20, 2024, 10:24 p.m. UTC | #16
On Mon, May 20, 2024 at 12:05 PM Peter Maydell <peter.maydell@linaro.org>
wrote:

> On Tue, 6 Feb 2024 at 13:25, Cord Amfmgm <dmamfmgm@gmail.com> wrote:
> >
> > This changes the ohci validation to not assert if invalid
> > data is fed to the ohci controller. The poc suggested in
> > https://bugs.launchpad.net/qemu/+bug/1907042
> > and then migrated to bug #303 does the following to
> > feed it a SETUP pid and EndPt of 1:
> >
> >         uint32_t MaxPacket = 64;
> >         uint32_t TDFormat = 0;
> >         uint32_t Skip = 0;
> >         uint32_t Speed = 0;
> >         uint32_t Direction = 0;  /* #define OHCI_TD_DIR_SETUP 0 */
> >         uint32_t EndPt = 1;
> >         uint32_t FuncAddress = 0;
> >         ed->attr = (MaxPacket << 16) | (TDFormat << 15) | (Skip << 14)
> >                    | (Speed << 13) | (Direction << 11) | (EndPt << 7)
> >                    | FuncAddress;
> >         ed->tailp = /*TDQTailPntr= */ 0;
> >         ed->headp = ((/*TDQHeadPntr= */ &td[0]) & 0xfffffff0)
> >                    | (/* ToggleCarry= */ 0 << 1);
> >         ed->next_ed = (/* NextED= */ 0 & 0xfffffff0)
> >
> > qemu-fuzz also caught the same issue in #1510. They are
> > both fixed by this patch.
> >
> > The if (td.cbp > td.be) logic in ohci_service_td() causes an
> > ohci_die(). My understanding of the OHCI spec 4.3.1.2
> > Table 4-2 allows td.cbp to be one byte more than td.be to
> > signal the buffer has zero length. The new check in qemu
> > appears to have been added since qemu-4.2. This patch
> > includes both fixes since they are located very close
> > together.
>
> For the "zero length buffer" case, do you have a more detailed
> pointer to the bit of the spec that says that "cbp = be + 1" is a
> valid way to specify a zero length buffer? Table 4-2 in the copy I
> have says for CurrentBufferPointer "a value of 0 indicates
> a zero-length data packet or that all bytes have been transferred",
> and the sample host OS driver function QueueGeneralRequest()
> later in the spec handles a 0 length packet by setting
>   TD->HcTD.CBP = TD->HcTD.BE = 0;
> (which our emulation's code does handle).
>

Reading the spec carefully, a CBP set to 0 should always mean the
zero-length buffer case (or that all bytes have been transferred, so the
buffer is now zero-length - the same thing).

Table 4-2 is the correct reference, and this part is clear. It's the part
you quoted. "Contains the physical address of the next memory location that
will be accessed for transfer to/from the endpoint. A value of 0 indicates
a zero-length data packet or that all bytes have been transferred."

Table 4-2 has this additional nugget that may be confusingly worded, for
BufferEnd: "Contains physical address of the last byte in the buffer for
this TD"

As you say, QueueGeneralRequest() handles a 0 length packet by setting CBP
= BE = 0.

There's a little bit more right below Table 4-2 in section 4.3.1.3.1:

"The CurrentBufferPointer value in the General TD is the address of the
data buffer that will be used for a data packet transfer to/from the
endpoint addressed by the ED. When the transfer is completed without an
error of any kind, the Host Controller advances the value of
CurrentBufferPointer by the number of bytes transferred"

I'll put it in the context of an example buffer of length 8. Perhaps this
is the easiest answer about Table 4-2's BufferEnd definition...

char buf[8];
char * CurrentBufferPointer = &buf[0];
char * BufferEnd = &buf[7]; // "address of the last byte in the buffer"
// The OHCI Host Controller than advances CurrentBufferPointer like this:
CurrentBufferPointer += 8
// After the transfer:
// CurrentBufferPointer = &buf[8];
// BufferEnd = &buf[7];

And here's an example buffer of length 0 -- you probably already know what
I'm going to do here:

char buf[0];
char * CurrentBufferPointer = &buf[0];
char * BufferEnd = &buf[-1]; // "address of the last byte in the buffer"
// The OHCI Host Controller than advances CurrentBufferPointer like this:
CurrentBufferPointer += 0
// After the transfer:
// CurrentBufferPointer = &buf[0];
// BufferEnd = &buf[-1];


> > @@ -936,8 +941,8 @@ static int ohci_service_td(OHCIState *ohci, struct
> > ohci_ed *ed)
> >          if ((td.cbp & 0xfffff000) != (td.be & 0xfffff000)) {
> >              len = (td.be & 0xfff) + 0x1001 - (td.cbp & 0xfff);
> >          } else {
> > -            if (td.cbp > td.be) {
> > -                trace_usb_ohci_iso_td_bad_cc_overrun(td.cbp, td.be);
> > +            if (td.cbp > td.be + 1) {
>
> I think this has an overflow if td.be is 0xffffffff.
>

Opps, yes. I will submit a revised patch. Since this change is protected
inside a condition if (td.cbp && td.be) I plan to rewrite it as:

if (td.cbp - 1 > td.be) { // rely on td.cbp != 0


>
> > +                trace_usb_ohci_td_bad_buf(td.cbp, td.be);
> >                  ohci_die(ohci);
> >                  return 1;
> >              }
>
> (On the other hand having looked at the code I'm happy
> now that having a len of 0 passed into usb_packet_addbuf()
> is OK because we already do that for the "cbp = be = 0"
> way of specifying a zero length packet.)


I came to the same conclusion. The zero length packet is already being
passed into usb_packet_addbuf(), so this change should be ok.
Peter Maydell May 28, 2024, 2:03 p.m. UTC | #17
On Mon, 20 May 2024 at 23:24, Cord Amfmgm <dmamfmgm@gmail.com> wrote:
> On Mon, May 20, 2024 at 12:05 PM Peter Maydell <peter.maydell@linaro.org> wrote:
>> For the "zero length buffer" case, do you have a more detailed
>> pointer to the bit of the spec that says that "cbp = be + 1" is a
>> valid way to specify a zero length buffer? Table 4-2 in the copy I
>> have says for CurrentBufferPointer "a value of 0 indicates
>> a zero-length data packet or that all bytes have been transferred",
>> and the sample host OS driver function QueueGeneralRequest()
>> later in the spec handles a 0 length packet by setting
>>   TD->HcTD.CBP = TD->HcTD.BE = 0;
>> (which our emulation's code does handle).
>
>
> Reading the spec carefully, a CBP set to 0 should always mean the zero-length buffer case (or that all bytes have been transferred, so the buffer is now zero-length - the same thing).

Yeah, I can see the argument for "we should treat all cbp == 0 as
zero-length buffer, not just cbp == be == 0".

> Table 4-2 is the correct reference, and this part is clear. It's the part you quoted. "Contains the physical address of the next memory location that will be accessed for transfer to/from the endpoint. A value of 0 indicates a zero-length data packet or that all bytes have been transferred."
>
> Table 4-2 has this additional nugget that may be confusingly worded, for BufferEnd: "Contains physical address of the last byte in the buffer for this TD"

Mmm, but for a zero length buffer there is no "last byte" to
have this be the physical address of.

> And here's an example buffer of length 0 -- you probably already know what I'm going to do here:
>
> char buf[0];
> char * CurrentBufferPointer = &buf[0];
> char * BufferEnd = &buf[-1]; // "address of the last byte in the buffer"
> // The OHCI Host Controller than advances CurrentBufferPointer like this: CurrentBufferPointer += 0
> // After the transfer:
> // CurrentBufferPointer = &buf[0];
> // BufferEnd = &buf[-1];

Right, but why do you think this is valid, rather than
being a guest software bug? My reading of the spec is that it's
pretty clear about how to say "zero length buffer", and this
isn't it.

Is there some real-world guest OS that programs the OHCI
controller this way that we're trying to accommodate?

thanks
-- PMM
Cord Amfmgm May 28, 2024, 3:37 p.m. UTC | #18
On Tue, May 28, 2024 at 9:03 AM Peter Maydell <peter.maydell@linaro.org>
wrote:

> On Mon, 20 May 2024 at 23:24, Cord Amfmgm <dmamfmgm@gmail.com> wrote:
> > On Mon, May 20, 2024 at 12:05 PM Peter Maydell <peter.maydell@linaro.org>
> wrote:
> >> For the "zero length buffer" case, do you have a more detailed
> >> pointer to the bit of the spec that says that "cbp = be + 1" is a
> >> valid way to specify a zero length buffer? Table 4-2 in the copy I
> >> have says for CurrentBufferPointer "a value of 0 indicates
> >> a zero-length data packet or that all bytes have been transferred",
> >> and the sample host OS driver function QueueGeneralRequest()
> >> later in the spec handles a 0 length packet by setting
> >>   TD->HcTD.CBP = TD->HcTD.BE = 0;
> >> (which our emulation's code does handle).
> >
> >
> > Reading the spec carefully, a CBP set to 0 should always mean the
> zero-length buffer case (or that all bytes have been transferred, so the
> buffer is now zero-length - the same thing).
>
> Yeah, I can see the argument for "we should treat all cbp == 0 as
> zero-length buffer, not just cbp == be == 0".
>
> > Table 4-2 is the correct reference, and this part is clear. It's the
> part you quoted. "Contains the physical address of the next memory location
> that will be accessed for transfer to/from the endpoint. A value of 0
> indicates a zero-length data packet or that all bytes have been
> transferred."
> >
> > Table 4-2 has this additional nugget that may be confusingly worded, for
> BufferEnd: "Contains physical address of the last byte in the buffer for
> this TD"
>
> Mmm, but for a zero length buffer there is no "last byte" to
> have this be the physical address of.
>
> > And here's an example buffer of length 0 -- you probably already know
> what I'm going to do here:
> >
> > char buf[0];
> > char * CurrentBufferPointer = &buf[0];
> > char * BufferEnd = &buf[-1]; // "address of the last byte in the buffer"
> > // The OHCI Host Controller than advances CurrentBufferPointer like
> this: CurrentBufferPointer += 0
> > // After the transfer:
> > // CurrentBufferPointer = &buf[0];
> > // BufferEnd = &buf[-1];
>
> Right, but why do you think this is valid, rather than
> being a guest software bug? My reading of the spec is that it's
> pretty clear about how to say "zero length buffer", and this
> isn't it.
>
> Is there some real-world guest OS that programs the OHCI
> controller this way that we're trying to accommodate?
>

qemu versions 4.2 and before allowed this behavior.

I don't think it's valid to ask for a *popular* guest OS as a
proof-of-concept because I'm not an expert on those. The spec says "last
byte" which can (not "should" just "can") be interpreted as "cbp - 1".
Therefore, I raised this patch request to re-enable the previous qemu
behavior, in the sense of ""be conservative in what you do, be liberal in
what you accept from others" -
https://en.wikipedia.org/wiki/Robustness_principle

It is *not* valid to say the spec disallows "cbp - 1" to mean "empty
buffer." That is what I am arguing :)


>
> thanks
> -- PMM
>
Peter Maydell May 28, 2024, 4:32 p.m. UTC | #19
On Tue, 28 May 2024 at 16:37, Cord Amfmgm <dmamfmgm@gmail.com> wrote:
>
>
>
> On Tue, May 28, 2024 at 9:03 AM Peter Maydell <peter.maydell@linaro.org> wrote:
>>
>> On Mon, 20 May 2024 at 23:24, Cord Amfmgm <dmamfmgm@gmail.com> wrote:
>> > On Mon, May 20, 2024 at 12:05 PM Peter Maydell <peter.maydell@linaro.org> wrote:
>> >> For the "zero length buffer" case, do you have a more detailed
>> >> pointer to the bit of the spec that says that "cbp = be + 1" is a
>> >> valid way to specify a zero length buffer? Table 4-2 in the copy I
>> >> have says for CurrentBufferPointer "a value of 0 indicates
>> >> a zero-length data packet or that all bytes have been transferred",
>> >> and the sample host OS driver function QueueGeneralRequest()
>> >> later in the spec handles a 0 length packet by setting
>> >>   TD->HcTD.CBP = TD->HcTD.BE = 0;
>> >> (which our emulation's code does handle).
>> >
>> >
>> > Reading the spec carefully, a CBP set to 0 should always mean the zero-length buffer case (or that all bytes have been transferred, so the buffer is now zero-length - the same thing).
>>
>> Yeah, I can see the argument for "we should treat all cbp == 0 as
>> zero-length buffer, not just cbp == be == 0".
>>
>> > Table 4-2 is the correct reference, and this part is clear. It's the part you quoted. "Contains the physical address of the next memory location that will be accessed for transfer to/from the endpoint. A value of 0 indicates a zero-length data packet or that all bytes have been transferred."
>> >
>> > Table 4-2 has this additional nugget that may be confusingly worded, for BufferEnd: "Contains physical address of the last byte in the buffer for this TD"
>>
>> Mmm, but for a zero length buffer there is no "last byte" to
>> have this be the physical address of.
>>
>> > And here's an example buffer of length 0 -- you probably already know what I'm going to do here:
>> >
>> > char buf[0];
>> > char * CurrentBufferPointer = &buf[0];
>> > char * BufferEnd = &buf[-1]; // "address of the last byte in the buffer"
>> > // The OHCI Host Controller than advances CurrentBufferPointer like this: CurrentBufferPointer += 0
>> > // After the transfer:
>> > // CurrentBufferPointer = &buf[0];
>> > // BufferEnd = &buf[-1];
>>
>> Right, but why do you think this is valid, rather than
>> being a guest software bug? My reading of the spec is that it's
>> pretty clear about how to say "zero length buffer", and this
>> isn't it.
>>
>> Is there some real-world guest OS that programs the OHCI
>> controller this way that we're trying to accommodate?
>
>
> qemu versions 4.2 and before allowed this behavior.

So? That might just mean we had a bug and we fixed it.
4.2 is a very old version of QEMU and nobody seems to have
complained in the four years since we released 5.0 about this,
which suggests that generally guest OS drivers don't try
to send zero-length buffers in this way.

> I don't think it's valid to ask for a *popular* guest OS as a proof-of-concept because I'm not an expert on those.

I didn't ask for "popular"; I asked for "real-world".
What is the actual guest code you're running that falls over
because of the behaviour change?

More generally, why do you want this behaviour to be
changed? Reasonable reasons might include:
 * we're out of spec based on reading the documentation
 * you're trying to run some old Windows VM/QNX/etc image,
   and it doesn't work any more
 * all the real hardware we tested behaves this way

But don't necessarily include:
 * something somebody wrote and only tested on QEMU happens to
   assume the old behaviour rather than following the hw spec

QEMU occasionally works around guest OS bugs, but only as
when we really have to. It's usually better to fix the
bug in the guest.

thanks
-- PMM
Cord Amfmgm May 30, 2024, 4:54 a.m. UTC | #20
On Tue, May 28, 2024 at 11:32 AM Peter Maydell <peter.maydell@linaro.org>
wrote:

> On Tue, 28 May 2024 at 16:37, Cord Amfmgm <dmamfmgm@gmail.com> wrote:
> >
> >
> >
> > On Tue, May 28, 2024 at 9:03 AM Peter Maydell <peter.maydell@linaro.org>
> wrote:
> >>
> >> On Mon, 20 May 2024 at 23:24, Cord Amfmgm <dmamfmgm@gmail.com> wrote:
> >> > On Mon, May 20, 2024 at 12:05 PM Peter Maydell <
> peter.maydell@linaro.org> wrote:
> >> >> For the "zero length buffer" case, do you have a more detailed
> >> >> pointer to the bit of the spec that says that "cbp = be + 1" is a
> >> >> valid way to specify a zero length buffer? Table 4-2 in the copy I
> >> >> have says for CurrentBufferPointer "a value of 0 indicates
> >> >> a zero-length data packet or that all bytes have been transferred",
> >> >> and the sample host OS driver function QueueGeneralRequest()
> >> >> later in the spec handles a 0 length packet by setting
> >> >>   TD->HcTD.CBP = TD->HcTD.BE = 0;
> >> >> (which our emulation's code does handle).
> >> >
> >> >
> >> > Reading the spec carefully, a CBP set to 0 should always mean the
> zero-length buffer case (or that all bytes have been transferred, so the
> buffer is now zero-length - the same thing).
> >>
> >> Yeah, I can see the argument for "we should treat all cbp == 0 as
> >> zero-length buffer, not just cbp == be == 0".
> >>
> >> > Table 4-2 is the correct reference, and this part is clear. It's the
> part you quoted. "Contains the physical address of the next memory location
> that will be accessed for transfer to/from the endpoint. A value of 0
> indicates a zero-length data packet or that all bytes have been
> transferred."
> >> >
> >> > Table 4-2 has this additional nugget that may be confusingly worded,
> for BufferEnd: "Contains physical address of the last byte in the buffer
> for this TD"
> >>
> >> Mmm, but for a zero length buffer there is no "last byte" to
> >> have this be the physical address of.
> >>
> >> > And here's an example buffer of length 0 -- you probably already know
> what I'm going to do here:
> >> >
> >> > char buf[0];
> >> > char * CurrentBufferPointer = &buf[0];
> >> > char * BufferEnd = &buf[-1]; // "address of the last byte in the
> buffer"
> >> > // The OHCI Host Controller than advances CurrentBufferPointer like
> this: CurrentBufferPointer += 0
> >> > // After the transfer:
> >> > // CurrentBufferPointer = &buf[0];
> >> > // BufferEnd = &buf[-1];
> >>
> >> Right, but why do you think this is valid, rather than
> >> being a guest software bug? My reading of the spec is that it's
> >> pretty clear about how to say "zero length buffer", and this
> >> isn't it.
> >>
> >> Is there some real-world guest OS that programs the OHCI
> >> controller this way that we're trying to accommodate?
> >
> >
> > qemu versions 4.2 and before allowed this behavior.
>
> So? That might just mean we had a bug and we fixed it.
> 4.2 is a very old version of QEMU and nobody seems to have
> complained in the four years since we released 5.0 about this,
> which suggests that generally guest OS drivers don't try
> to send zero-length buffers in this way.
>
> > I don't think it's valid to ask for a *popular* guest OS as a
> proof-of-concept because I'm not an expert on those.
>
> I didn't ask for "popular"; I asked for "real-world".
> What is the actual guest code you're running that falls over
> because of the behaviour change?
>
> More generally, why do you want this behaviour to be
> changed? Reasonable reasons might include:
>  * we're out of spec based on reading the documentation
>  * you're trying to run some old Windows VM/QNX/etc image,
>    and it doesn't work any more
>  * all the real hardware we tested behaves this way
>
> But don't necessarily include:
>  * something somebody wrote and only tested on QEMU happens to
>    assume the old behaviour rather than following the hw spec
>
> QEMU occasionally works around guest OS bugs, but only as
> when we really have to. It's usually better to fix the
> bug in the guest.
>

It's not, and I've already demonstrated that real hardware is consistent
with the fix in this patch.

Please check your tone.


>
> thanks
> -- PMM
>
Alex Bennée May 30, 2024, 8:33 a.m. UTC | #21
Cord Amfmgm <dmamfmgm@gmail.com> writes:

> On Tue, May 28, 2024 at 11:32 AM Peter Maydell <peter.maydell@linaro.org> wrote:
>
>  On Tue, 28 May 2024 at 16:37, Cord Amfmgm <dmamfmgm@gmail.com> wrote:
>  >
>  > On Tue, May 28, 2024 at 9:03 AM Peter Maydell <peter.maydell@linaro.org> wrote:
>  >>
>  >> On Mon, 20 May 2024 at 23:24, Cord Amfmgm <dmamfmgm@gmail.com> wrote:
>  >> > On Mon, May 20, 2024 at 12:05 PM Peter Maydell <peter.maydell@linaro.org> wrote:
<snip>
>  >> > And here's an example buffer of length 0 -- you probably already know what I'm going to do here:
>  >> >
>  >> > char buf[0];
>  >> > char * CurrentBufferPointer = &buf[0];
>  >> > char * BufferEnd = &buf[-1]; // "address of the last byte in the buffer"
>  >> > // The OHCI Host Controller than advances CurrentBufferPointer like this: CurrentBufferPointer += 0
>  >> > // After the transfer:
>  >> > // CurrentBufferPointer = &buf[0];
>  >> > // BufferEnd = &buf[-1];
>  >>
>  >> Right, but why do you think this is valid, rather than
>  >> being a guest software bug? My reading of the spec is that it's
>  >> pretty clear about how to say "zero length buffer", and this
>  >> isn't it.
>  >>
>  >> Is there some real-world guest OS that programs the OHCI
>  >> controller this way that we're trying to accommodate?
>  >
>  >
>  > qemu versions 4.2 and before allowed this behavior.
>
>  So? That might just mean we had a bug and we fixed it.
>  4.2 is a very old version of QEMU and nobody seems to have
>  complained in the four years since we released 5.0 about this,
>  which suggests that generally guest OS drivers don't try
>  to send zero-length buffers in this way.
>
>  > I don't think it's valid to ask for a *popular* guest OS as a proof-of-concept because I'm not an expert on those.
>
>  I didn't ask for "popular"; I asked for "real-world".
>  What is the actual guest code you're running that falls over
>  because of the behaviour change?
>
>  More generally, why do you want this behaviour to be
>  changed? Reasonable reasons might include:
>   * we're out of spec based on reading the documentation
>   * you're trying to run some old Windows VM/QNX/etc image,
>     and it doesn't work any more
>   * all the real hardware we tested behaves this way
>
>  But don't necessarily include:
>   * something somebody wrote and only tested on QEMU happens to
>     assume the old behaviour rather than following the hw spec
>
>  QEMU occasionally works around guest OS bugs, but only as
>  when we really have to. It's usually better to fix the
>  bug in the guest.
>
> It's not, and I've already demonstrated that real hardware is consistent with the fix in this patch.
>
> Please check your tone.

I don't think that is a particularly helpful comment for someone who is
taking the time to review your patches. Reading through the thread I
didn't see anything that said this is how real HW behaves but I may well
have missed it. However you have a number of review comments to address
so I suggest you spin a v2 of the series to address them and outline the
reason to accept an out of spec transaction.
Cord Amfmgm May 30, 2024, 4:03 p.m. UTC | #22
On Thu, May 30, 2024 at 3:33 AM Alex Bennée <alex.bennee@linaro.org> wrote:

> Cord Amfmgm <dmamfmgm@gmail.com> writes:
>
> > On Tue, May 28, 2024 at 11:32 AM Peter Maydell <peter.maydell@linaro.org>
> wrote:
> >
> >  On Tue, 28 May 2024 at 16:37, Cord Amfmgm <dmamfmgm@gmail.com> wrote:
> >  >
> >  > On Tue, May 28, 2024 at 9:03 AM Peter Maydell <
> peter.maydell@linaro.org> wrote:
> >  >>
> >  >> On Mon, 20 May 2024 at 23:24, Cord Amfmgm <dmamfmgm@gmail.com>
> wrote:
> >  >> > On Mon, May 20, 2024 at 12:05 PM Peter Maydell <
> peter.maydell@linaro.org> wrote:
> <snip>
> >  >> > And here's an example buffer of length 0 -- you probably already
> know what I'm going to do here:
> >  >> >
> >  >> > char buf[0];
> >  >> > char * CurrentBufferPointer = &buf[0];
> >  >> > char * BufferEnd = &buf[-1]; // "address of the last byte in the
> buffer"
> >  >> > // The OHCI Host Controller than advances CurrentBufferPointer
> like this: CurrentBufferPointer += 0
> >  >> > // After the transfer:
> >  >> > // CurrentBufferPointer = &buf[0];
> >  >> > // BufferEnd = &buf[-1];
> >  >>
> >  >> Right, but why do you think this is valid, rather than
> >  >> being a guest software bug? My reading of the spec is that it's
> >  >> pretty clear about how to say "zero length buffer", and this
> >  >> isn't it.
> >  >>
> >  >> Is there some real-world guest OS that programs the OHCI
> >  >> controller this way that we're trying to accommodate?
> >  >
> >  >
> >  > qemu versions 4.2 and before allowed this behavior.
> >
> >  So? That might just mean we had a bug and we fixed it.
> >  4.2 is a very old version of QEMU and nobody seems to have
> >  complained in the four years since we released 5.0 about this,
> >  which suggests that generally guest OS drivers don't try
> >  to send zero-length buffers in this way.
> >
> >  > I don't think it's valid to ask for a *popular* guest OS as a
> proof-of-concept because I'm not an expert on those.
> >
> >  I didn't ask for "popular"; I asked for "real-world".
> >  What is the actual guest code you're running that falls over
> >  because of the behaviour change?
> >
> >  More generally, why do you want this behaviour to be
> >  changed? Reasonable reasons might include:
> >   * we're out of spec based on reading the documentation
> >   * you're trying to run some old Windows VM/QNX/etc image,
> >     and it doesn't work any more
> >   * all the real hardware we tested behaves this way
> >
> >  But don't necessarily include:
> >   * something somebody wrote and only tested on QEMU happens to
> >     assume the old behaviour rather than following the hw spec
> >
> >  QEMU occasionally works around guest OS bugs, but only as
> >  when we really have to. It's usually better to fix the
> >  bug in the guest.
> >
> > It's not, and I've already demonstrated that real hardware is consistent
> with the fix in this patch.
> >
> > Please check your tone.
>
> I don't think that is a particularly helpful comment for someone who is
> taking the time to review your patches. Reading through the thread I
> didn't see anything that said this is how real HW behaves but I may well
> have missed it. However you have a number of review comments to address
> so I suggest you spin a v2 of the series to address them and outline the
> reason to accept an out of spec transaction.
>
>
I did a rework of the patch -- see my email from May 20, quoted below --
and I was under the impression it addressed all the review comments. Did I
miss something? I apologize if I did.

> index acd6016980..71b54914d3 100644
> --- a/hw/usb/hcd-ohci.c
> +++ b/hw/usb/hcd-ohci.c
> @@ -941,8 +941,8 @@ static int ohci_service_td(OHCIState *ohci, struct
ohci_ed *ed)
>          if ((td.cbp & 0xfffff000) != (td.be & 0xfffff000)) {
>              len = (td.be & 0xfff) + 0x1001 - (td.cbp & 0xfff);
>          } else {
> -            if (td.cbp > td.be) {
> -                trace_usb_ohci_iso_td_bad_cc_overrun(td.cbp, td.be);
> +            if (td.cbp - 1 > td.be) {  /* rely on td.cbp != 0 */


> Reading through the thread I didn't see anything that said this is how
real HW behaves but I may well have missed it.

This is what I wrote regarding real HW:

Results are:

 qemu 4.2   | qemu HEAD  | actual HW
------------+------------+------------
 works fine | ohci_die() | works fine

Would additional verification of the actual HW be useful?

Peter posted the following which is more specific than "qemu 4.2" -- I
agree this is most likely the qemu commit where this thread is focused:

> Almost certainly this was commit 1328fe0c32d54 ("hw: usb: hcd-ohci:
> check len and frame_number variables"), which added these bounds
> checks. Prior to that we did no bounds checking at all, which
> meant that we permitted cbp=be+1 to mean a zero length, but also
> that we permitted the guest to overrun host-side buffers by
> specifying completely bogus cbp and be values. The timeframe is
> more or less right (2020), at least.
>
> -- PMM

Where does the conversation go from here? I'm under the impression I have
provided objective answers to all the questions and resolved all review
comments on the code. I receive the feedback that I missed something -
please restate the question?
Alex Bennée May 30, 2024, 7:12 p.m. UTC | #23
Cord Amfmgm <dmamfmgm@gmail.com> writes:

> On Thu, May 30, 2024 at 3:33 AM Alex Bennée <alex.bennee@linaro.org> wrote:
>
>  Cord Amfmgm <dmamfmgm@gmail.com> writes:
>
>  > On Tue, May 28, 2024 at 11:32 AM Peter Maydell <peter.maydell@linaro.org> wrote:
>  >
>  >  On Tue, 28 May 2024 at 16:37, Cord Amfmgm <dmamfmgm@gmail.com> wrote:
>  >  >
>  >  > On Tue, May 28, 2024 at 9:03 AM Peter Maydell <peter.maydell@linaro.org> wrote:
>  >  >>
>  >  >> On Mon, 20 May 2024 at 23:24, Cord Amfmgm <dmamfmgm@gmail.com> wrote:
>  >  >> > On Mon, May 20, 2024 at 12:05 PM Peter Maydell <peter.maydell@linaro.org> wrote:
>  <snip>
>  >  >> > And here's an example buffer of length 0 -- you probably already know what I'm going to do here:
>  >  >> >
>  >  >> > char buf[0];
>  >  >> > char * CurrentBufferPointer = &buf[0];
>  >  >> > char * BufferEnd = &buf[-1]; // "address of the last byte in the buffer"
>  >  >> > // The OHCI Host Controller than advances CurrentBufferPointer like this: CurrentBufferPointer += 0
>  >  >> > // After the transfer:
>  >  >> > // CurrentBufferPointer = &buf[0];
>  >  >> > // BufferEnd = &buf[-1];
>  >  >>
>  >  >> Right, but why do you think this is valid, rather than
>  >  >> being a guest software bug? My reading of the spec is that it's
>  >  >> pretty clear about how to say "zero length buffer", and this
>  >  >> isn't it.
>  >  >>
>  >  >> Is there some real-world guest OS that programs the OHCI
>  >  >> controller this way that we're trying to accommodate?
>  >  >
>  >  >
>  >  > qemu versions 4.2 and before allowed this behavior.
>  >
>  >  So? That might just mean we had a bug and we fixed it.
>  >  4.2 is a very old version of QEMU and nobody seems to have
>  >  complained in the four years since we released 5.0 about this,
>  >  which suggests that generally guest OS drivers don't try
>  >  to send zero-length buffers in this way.
>  >
>  >  > I don't think it's valid to ask for a *popular* guest OS as a proof-of-concept because I'm not an expert on those.
>  >
>  >  I didn't ask for "popular"; I asked for "real-world".
>  >  What is the actual guest code you're running that falls over
>  >  because of the behaviour change?
>  >
>  >  More generally, why do you want this behaviour to be
>  >  changed? Reasonable reasons might include:
>  >   * we're out of spec based on reading the documentation
>  >   * you're trying to run some old Windows VM/QNX/etc image,
>  >     and it doesn't work any more
>  >   * all the real hardware we tested behaves this way
>  >
>  >  But don't necessarily include:
>  >   * something somebody wrote and only tested on QEMU happens to
>  >     assume the old behaviour rather than following the hw spec
>  >
>  >  QEMU occasionally works around guest OS bugs, but only as
>  >  when we really have to. It's usually better to fix the
>  >  bug in the guest.
>  >
>  > It's not, and I've already demonstrated that real hardware is consistent with the fix in this patch.
>  >
>  > Please check your tone.
>
>  I don't think that is a particularly helpful comment for someone who is
>  taking the time to review your patches. Reading through the thread I
>  didn't see anything that said this is how real HW behaves but I may well
>  have missed it. However you have a number of review comments to address
>  so I suggest you spin a v2 of the series to address them and outline the
>  reason to accept an out of spec transaction.
>
> I did a rework of the patch -- see my email from May 20, quoted below -- and I was under the impression it addressed all the
> review comments. Did I miss something? I apologize if I did.

Ahh I see - I'd only seen this thread continue so wasn't aware a new
version had been posted. For future patches consider using -vN when
sending them so we can clearly see a new revision is available.

>
>> index acd6016980..71b54914d3 100644
>> --- a/hw/usb/hcd-ohci.c
>> +++ b/hw/usb/hcd-ohci.c
>> @@ -941,8 +941,8 @@ static int ohci_service_td(OHCIState *ohci, struct ohci_ed *ed)
>>          if ((td.cbp & 0xfffff000) != (td.be & 0xfffff000)) {
>>              len = (td.be & 0xfff) + 0x1001 - (td.cbp & 0xfff);
>>          } else {
>> -            if (td.cbp > td.be) {
>> -                trace_usb_ohci_iso_td_bad_cc_overrun(td.cbp, td.be);
>> +            if (td.cbp - 1 > td.be) {  /* rely on td.cbp != 0 */
>
>> Reading through the thread I didn't see anything that said this is how real HW behaves but I may well have missed it.
>
> This is what I wrote regarding real HW:
>
> Results are:
>
>  qemu 4.2   | qemu HEAD  | actual HW
> ------------+------------+------------
>  works fine | ohci_die() | works fine
>
> Would additional verification of the actual HW be useful?
>
> Peter posted the following which is more specific than "qemu 4.2" -- I agree this is most likely the qemu commit where this
> thread is focused:
>
>> Almost certainly this was commit 1328fe0c32d54 ("hw: usb: hcd-ohci:
>> check len and frame_number variables"), which added these bounds
>> checks. Prior to that we did no bounds checking at all, which
>> meant that we permitted cbp=be+1 to mean a zero length, but also
>> that we permitted the guest to overrun host-side buffers by
>> specifying completely bogus cbp and be values. The timeframe is
>> more or less right (2020), at least.
>> 
>> -- PMM
>
> Where does the conversation go from here? I'm under the impression I have provided objective answers to all the questions
> and resolved all review comments on the code. I receive the feedback
> that I missed something - please restate the question?

I can see patch 1/2 has been queued and 2/2 is still outstanding. I'm
having trouble finding the referenced entry in the OHCI spec. The only
one I can see is Release 1.1, January 6th, 2000 and that doesn't have a
section 4.3.1.2.

I think discussion should continue on that thread.
Cord Amfmgm May 30, 2024, 9:11 p.m. UTC | #24
On Thu, May 30, 2024 at 2:12 PM Alex Bennée <alex.bennee@linaro.org> wrote:

> Cord Amfmgm <dmamfmgm@gmail.com> writes:
>
> > On Thu, May 30, 2024 at 3:33 AM Alex Bennée <alex.bennee@linaro.org>
> wrote:
> >
> >  Cord Amfmgm <dmamfmgm@gmail.com> writes:
> >
> >  > On Tue, May 28, 2024 at 11:32 AM Peter Maydell <
> peter.maydell@linaro.org> wrote:
> >  >
> >  >  On Tue, 28 May 2024 at 16:37, Cord Amfmgm <dmamfmgm@gmail.com>
> wrote:
> >  >  >
> >  >  > On Tue, May 28, 2024 at 9:03 AM Peter Maydell <
> peter.maydell@linaro.org> wrote:
> >  >  >>
> >  >  >> On Mon, 20 May 2024 at 23:24, Cord Amfmgm <dmamfmgm@gmail.com>
> wrote:
> >  >  >> > On Mon, May 20, 2024 at 12:05 PM Peter Maydell <
> peter.maydell@linaro.org> wrote:
> >  <snip>
> >  >  >> > And here's an example buffer of length 0 -- you probably
> already know what I'm going to do here:
> >  >  >> >
> >  >  >> > char buf[0];
> >  >  >> > char * CurrentBufferPointer = &buf[0];
> >  >  >> > char * BufferEnd = &buf[-1]; // "address of the last byte in
> the buffer"
> >  >  >> > // The OHCI Host Controller than advances CurrentBufferPointer
> like this: CurrentBufferPointer += 0
> >  >  >> > // After the transfer:
> >  >  >> > // CurrentBufferPointer = &buf[0];
> >  >  >> > // BufferEnd = &buf[-1];
> >  >  >>
> >  >  >> Right, but why do you think this is valid, rather than
> >  >  >> being a guest software bug? My reading of the spec is that it's
> >  >  >> pretty clear about how to say "zero length buffer", and this
> >  >  >> isn't it.
> >  >  >>
> >  >  >> Is there some real-world guest OS that programs the OHCI
> >  >  >> controller this way that we're trying to accommodate?
> >  >  >
> >  >  >
> >  >  > qemu versions 4.2 and before allowed this behavior.
> >  >
> >  >  So? That might just mean we had a bug and we fixed it.
> >  >  4.2 is a very old version of QEMU and nobody seems to have
> >  >  complained in the four years since we released 5.0 about this,
> >  >  which suggests that generally guest OS drivers don't try
> >  >  to send zero-length buffers in this way.
> >  >
> >  >  > I don't think it's valid to ask for a *popular* guest OS as a
> proof-of-concept because I'm not an expert on those.
> >  >
> >  >  I didn't ask for "popular"; I asked for "real-world".
> >  >  What is the actual guest code you're running that falls over
> >  >  because of the behaviour change?
> >  >
> >  >  More generally, why do you want this behaviour to be
> >  >  changed? Reasonable reasons might include:
> >  >   * we're out of spec based on reading the documentation
> >  >   * you're trying to run some old Windows VM/QNX/etc image,
> >  >     and it doesn't work any more
> >  >   * all the real hardware we tested behaves this way
> >  >
> >  >  But don't necessarily include:
> >  >   * something somebody wrote and only tested on QEMU happens to
> >  >     assume the old behaviour rather than following the hw spec
> >  >
> >  >  QEMU occasionally works around guest OS bugs, but only as
> >  >  when we really have to. It's usually better to fix the
> >  >  bug in the guest.
> >  >
> >  > It's not, and I've already demonstrated that real hardware is
> consistent with the fix in this patch.
> >  >
> >  > Please check your tone.
> >
> >  I don't think that is a particularly helpful comment for someone who is
> >  taking the time to review your patches. Reading through the thread I
> >  didn't see anything that said this is how real HW behaves but I may well
> >  have missed it. However you have a number of review comments to address
> >  so I suggest you spin a v2 of the series to address them and outline the
> >  reason to accept an out of spec transaction.
> >
> > I did a rework of the patch -- see my email from May 20, quoted below --
> and I was under the impression it addressed all the
> > review comments. Did I miss something? I apologize if I did.
>
> Ahh I see - I'd only seen this thread continue so wasn't aware a new
> version had been posted. For future patches consider using -vN when
> sending them so we can clearly see a new revision is available.
>
> >
> >> index acd6016980..71b54914d3 100644
> >> --- a/hw/usb/hcd-ohci.c
> >> +++ b/hw/usb/hcd-ohci.c
> >> @@ -941,8 +941,8 @@ static int ohci_service_td(OHCIState *ohci, struct
> ohci_ed *ed)
> >>          if ((td.cbp & 0xfffff000) != (td.be & 0xfffff000)) {
> >>              len = (td.be & 0xfff) + 0x1001 - (td.cbp & 0xfff);
> >>          } else {
> >> -            if (td.cbp > td.be) {
> >> -                trace_usb_ohci_iso_td_bad_cc_overrun(td.cbp, td.be);
> >> +            if (td.cbp - 1 > td.be) {  /* rely on td.cbp != 0 */
> >
> >> Reading through the thread I didn't see anything that said this is how
> real HW behaves but I may well have missed it.
> >
> > This is what I wrote regarding real HW:
> >
> > Results are:
> >
> >  qemu 4.2   | qemu HEAD  | actual HW
> > ------------+------------+------------
> >  works fine | ohci_die() | works fine
> >
> > Would additional verification of the actual HW be useful?
> >
> > Peter posted the following which is more specific than "qemu 4.2" -- I
> agree this is most likely the qemu commit where this
> > thread is focused:
> >
> >> Almost certainly this was commit 1328fe0c32d54 ("hw: usb: hcd-ohci:
> >> check len and frame_number variables"), which added these bounds
> >> checks. Prior to that we did no bounds checking at all, which
> >> meant that we permitted cbp=be+1 to mean a zero length, but also
> >> that we permitted the guest to overrun host-side buffers by
> >> specifying completely bogus cbp and be values. The timeframe is
> >> more or less right (2020), at least.
> >>
> >> -- PMM
> >
> > Where does the conversation go from here? I'm under the impression I
> have provided objective answers to all the questions
> > and resolved all review comments on the code. I receive the feedback
> > that I missed something - please restate the question?
>
> I can see patch 1/2 has been queued and 2/2 is still outstanding. I'm
> having trouble finding the referenced entry in the OHCI spec. The only
> one I can see is Release 1.1, January 6th, 2000 and that doesn't have a
> section 4.3.1.2.
>
> I think discussion should continue on that thread.
>

Yes, agreed.


>
> --
> Alex Bennée
> Virtualisation Tech Lead @ Linaro
>
diff mbox series

Patch

diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
index d73b53f33c..a53808126f 100644
--- a/hw/usb/hcd-ohci.c
+++ b/hw/usb/hcd-ohci.c
@@ -927,6 +927,11 @@  static int ohci_service_td(OHCIState *ohci,
struct ohci_ed *ed)
     case OHCI_TD_DIR_SETUP:
         str = "setup";
         pid = USB_TOKEN_SETUP;
+        if (OHCI_BM(ed->flags, ED_EN) > 0) {  /* setup only allowed to ep 0 */
+            trace_usb_ohci_td_bad_pid(str, ed->flags, td.flags);
+            ohci_die(ohci);
+            return 1;
+        }
         break;
     default:
         trace_usb_ohci_td_bad_direction(dir);
@@ -936,8 +941,8 @@  static int ohci_service_td(OHCIState *ohci, struct
ohci_ed *ed)
         if ((td.cbp & 0xfffff000) != (td.be & 0xfffff000)) {
             len = (td.be & 0xfff) + 0x1001 - (td.cbp & 0xfff);
         } else {
-            if (td.cbp > td.be) {
-                trace_usb_ohci_iso_td_bad_cc_overrun(td.cbp, td.be);
+            if (td.cbp > td.be + 1) {
+                trace_usb_ohci_td_bad_buf(td.cbp, td.be);
                 ohci_die(ohci);
                 return 1;
             }
diff --git a/hw/usb/trace-events b/hw/usb/trace-events
index ed7dc210d3..b47d082fa3 100644
--- a/hw/usb/trace-events
+++ b/hw/usb/trace-events
@@ -28,6 +28,8 @@  usb_ohci_iso_td_data_overrun(int ret, ssize_t len)
"DataOverrun %d > %zu"
 usb_ohci_iso_td_data_underrun(int ret) "DataUnderrun %d"
 usb_ohci_iso_td_nak(int ret) "got NAK/STALL %d"
 usb_ohci_iso_td_bad_response(int ret) "Bad device response %d"
+usb_ohci_td_bad_buf(uint32_t cbp, uint32_t be) "Bad cbp = 0x%x > be = 0x%x"
+usb_ohci_td_bad_pid(const char *s, uint32_t edf, uint32_t tdf) "Bad
pid %s: ed.flags 0x%x td.flags 0x%x"
 usb_ohci_port_attach(int index) "port #%d"