Message ID | 1530811543-6881-11-git-send-email-jjherne@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | s390: vfio-ccw dasd ipl support | expand |
On 07/05/2018 07:25 PM, Jason J. Herne wrote: > From: "Jason J. Herne" <jjherne@linux.vnet.ibm.com> > > Add struct for format-0 ccws. Support executing format-0 channel > programs and waiting for their completion before continuing execution. > This will be used for real dasd ipl. > > Add cu_type() to channel io library. This will be used to query control > unit type which is used to determine if we are booting a virtio device or a > real dasd device. > > Signed-off-by: Jason J. Herne <jjherne@linux.vnet.ibm.com> > Signed-off-by: Jason J. Herne <jjherne@linux.ibm.com> > --- > pc-bios/s390-ccw/cio.c | 127 +++++++++++++++++++++++++++++++++++++++++++++++++ > pc-bios/s390-ccw/cio.h | 25 +++++++++- > 2 files changed, 151 insertions(+), 1 deletion(-) > > diff --git a/pc-bios/s390-ccw/cio.c b/pc-bios/s390-ccw/cio.c > index 095f79b..f440380 100644 > --- a/pc-bios/s390-ccw/cio.c > +++ b/pc-bios/s390-ccw/cio.c > @@ -10,6 +10,7 @@ > > #include "libc.h" > #include "s390-ccw.h" > +#include "s390-arch.h" > #include "cio.h" > > static char chsc_page[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); > @@ -39,3 +40,129 @@ void enable_subchannel(SubChannelId schid) > schib.pmcw.ena = 1; > msch(schid, &schib); > } > + > +__u16 cu_type(SubChannelId schid) > +{ > + Ccw1 senseIdCcw; > + SenseId senseData; > + > + senseIdCcw.cmd_code = CCW_CMD_SENSE_ID; > + senseIdCcw.cda = ptr2u32(&senseData); > + senseIdCcw.count = sizeof(senseData); > + > + if (do_cio(schid, ptr2u32(&senseIdCcw), CCW_FMT1)) { > + panic("Failed to run SenseID CCw\n"); > + } > + > + return senseData.cu_type; > +} > + > +static bool irb_error(Irb *irb) > +{ > + /* We have to ignore Incorrect Length (cstat == 0x40) indicators because > + * real devices expect a 24 byte SenseID buffer, and virtio devices expect > + * a much larger buffer. Neither device type can tolerate a buffer size > + * different from what they expect so they set this indicator. > + */ > + if (irb->scsw.cstat != 0x00 && irb->scsw.cstat != 0x40) { > + return true; > + } > + return irb->scsw.dstat != 0xc; > +} > + > +/* Executes a channel program at a given subchannel. The request to run the > + * channel program is sent to the subchannel, we then wait for the interrupt > + * singaling completion of the I/O operation(s) perfomed by the channel > + * program. Lastly we verify that the i/o operation completed without error and > + * that the interrupt we received was for the subchannel used to run the > + * channel program. > + * > + * Note: This function assumes it is running in an environment where no other > + * cpus are generating or receiving I/O interrupts. So either run it in a > + * single-cpu environment or make sure all other cpus are not doing I/O and > + * have I/O interrupts masked off. > + */ > +int do_cio(SubChannelId schid, uint32_t ccw_addr, int fmt) > +{ > + Ccw0 *this_ccw, *prev_ccw; > + CmdOrb orb = {}; > + Irb irb = {}; > + int rc; > + > + IPL_assert(fmt == 0 || fmt == 1, "Invalid ccw format"); > + > + /* ccw_addr must be <= 24 bits and point to at least one whole ccw. */ > + if (fmt == 0) { > + IPL_assert(ccw_addr <= 0xFFFFFF - 8, "Invalid ccw address"); > + } > + > + orb.fmt = fmt ; > + orb.pfch = 1; /* QEMU's cio implementation requires prefetch */ > + orb.c64 = 1; /* QEMU's cio implementation requires 64-bit idaws */ > + orb.lpm = 0xFF; /* All paths allowed */ > + orb.cpa = ccw_addr; > + > + rc = ssch(schid, &orb); > + if (rc) { > + print_int("ssch failed with rc=", rc); > + return rc; > + } > + > + await_io_int(schid.sch_no); > + > + /* Clear read */ > + rc = tsch(schid, &irb); > + if (rc) { > + print_int("tsch failed with rc=", rc); > + return rc; > + } > + > + if (irb_error(&irb)) { > + this_ccw = u32toptr(irb.scsw.cpa); > + prev_ccw = u32toptr(irb.scsw.cpa - 8); > + > + print_int("irb_error: cstat=", irb.scsw.cstat); > + print_int(" dstat=", irb.scsw.dstat); > + print_int(" cpa=", irb.scsw.cpa); > + print_int(" prev_ccw=", *((uint64_t *)prev_ccw)); > + print_int(" this_ccw=", *((uint64_t *)this_ccw)); > + } > + > + return 0; > +} > + > +void await_io_int(uint16_t sch_no) > +{ > + /* > + * wait_psw and ctl6 must be static to avoid stack allocation as gcc cannot > + * align stack variables. The stctg, lctlg and lpswe instructions require > + * that their operands be aligned on an 8-byte boundary. > + */ > + static uint64_t ctl6 __attribute__((__aligned__(8))); > + static PSW wait_psw; > + > + /* PSW to load when I/O interrupt happens */ > + lowcore->io_new_psw.mask = PSW_MASK_ZMODE; > + lowcore->io_new_psw.addr = (uint64_t)&&IOIntWakeup; /* Wake-up address */ > + > + /* Enable io interrupts subclass mask */ > + asm volatile("stctg 6,6,%0" : "=S" (ctl6) : : "memory"); For some reason the sles12 gcc does not like this. Can you use the "Q" constraint everywhere instead of "S".
On Thu, 5 Jul 2018 13:25:38 -0400 "Jason J. Herne" <jjherne@linux.ibm.com> wrote: > From: "Jason J. Herne" <jjherne@linux.vnet.ibm.com> > > Add struct for format-0 ccws. Support executing format-0 channel > programs and waiting for their completion before continuing execution. > This will be used for real dasd ipl. > > Add cu_type() to channel io library. This will be used to query control > unit type which is used to determine if we are booting a virtio device or a > real dasd device. > > Signed-off-by: Jason J. Herne <jjherne@linux.vnet.ibm.com> > Signed-off-by: Jason J. Herne <jjherne@linux.ibm.com> > --- > pc-bios/s390-ccw/cio.c | 127 +++++++++++++++++++++++++++++++++++++++++++++++++ > pc-bios/s390-ccw/cio.h | 25 +++++++++- > 2 files changed, 151 insertions(+), 1 deletion(-) > > diff --git a/pc-bios/s390-ccw/cio.c b/pc-bios/s390-ccw/cio.c > index 095f79b..f440380 100644 > --- a/pc-bios/s390-ccw/cio.c > +++ b/pc-bios/s390-ccw/cio.c > @@ -10,6 +10,7 @@ > > #include "libc.h" > #include "s390-ccw.h" > +#include "s390-arch.h" > #include "cio.h" > > static char chsc_page[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); > @@ -39,3 +40,129 @@ void enable_subchannel(SubChannelId schid) > schib.pmcw.ena = 1; > msch(schid, &schib); > } > + > +__u16 cu_type(SubChannelId schid) Make this return uint16_t? > +{ > + Ccw1 senseIdCcw; > + SenseId senseData; I'd prefer the variables to be sense_id_ccw and sense_data instead of CamelCasing. > + > + senseIdCcw.cmd_code = CCW_CMD_SENSE_ID; > + senseIdCcw.cda = ptr2u32(&senseData); Are we sure that this is always under 2G? > + senseIdCcw.count = sizeof(senseData); > + > + if (do_cio(schid, ptr2u32(&senseIdCcw), CCW_FMT1)) { > + panic("Failed to run SenseID CCw\n"); > + } > + > + return senseData.cu_type; > +} > + > +static bool irb_error(Irb *irb) > +{ > + /* We have to ignore Incorrect Length (cstat == 0x40) indicators because > + * real devices expect a 24 byte SenseID buffer, and virtio devices expect > + * a much larger buffer. Neither device type can tolerate a buffer size > + * different from what they expect so they set this indicator. Hm... do you have details? Is that basic vs. extended SenseID information? (If the code in QEMU is making incorrect assumptions, I'd like to fix that.) > + */ > + if (irb->scsw.cstat != 0x00 && irb->scsw.cstat != 0x40) { > + return true; > + } > + return irb->scsw.dstat != 0xc; > +} > + > +/* Executes a channel program at a given subchannel. The request to run the > + * channel program is sent to the subchannel, we then wait for the interrupt > + * singaling completion of the I/O operation(s) perfomed by the channel > + * program. Lastly we verify that the i/o operation completed without error and > + * that the interrupt we received was for the subchannel used to run the > + * channel program. Finally, real interrupts instead of polling in the s390-ccw bios, nice :) > + * > + * Note: This function assumes it is running in an environment where no other > + * cpus are generating or receiving I/O interrupts. So either run it in a > + * single-cpu environment or make sure all other cpus are not doing I/O and > + * have I/O interrupts masked off. > + */ > +int do_cio(SubChannelId schid, uint32_t ccw_addr, int fmt) > +{ > + Ccw0 *this_ccw, *prev_ccw; > + CmdOrb orb = {}; > + Irb irb = {}; > + int rc; > + > + IPL_assert(fmt == 0 || fmt == 1, "Invalid ccw format"); > + > + /* ccw_addr must be <= 24 bits and point to at least one whole ccw. */ > + if (fmt == 0) { > + IPL_assert(ccw_addr <= 0xFFFFFF - 8, "Invalid ccw address"); > + } > + > + orb.fmt = fmt ; > + orb.pfch = 1; /* QEMU's cio implementation requires prefetch */ > + orb.c64 = 1; /* QEMU's cio implementation requires 64-bit idaws */ > + orb.lpm = 0xFF; /* All paths allowed */ > + orb.cpa = ccw_addr; > + > + rc = ssch(schid, &orb); > + if (rc) { > + print_int("ssch failed with rc=", rc); > + return rc; Are you doing anything like retrying on cc 1/2? It's probably fine to give up on cc 3. > + } > + > + await_io_int(schid.sch_no); > + > + /* Clear read */ > + rc = tsch(schid, &irb); > + if (rc) { > + print_int("tsch failed with rc=", rc); > + return rc; If you get a cc 1 here (no status pending), that's probably an internal error (as you just did a successful ssch and assume you got an I/O interrupt). If you get a cc 3, it's probably a good idea to give up on this subchannel. > + } > + > + if (irb_error(&irb)) { > + this_ccw = u32toptr(irb.scsw.cpa); > + prev_ccw = u32toptr(irb.scsw.cpa - 8); > + > + print_int("irb_error: cstat=", irb.scsw.cstat); > + print_int(" dstat=", irb.scsw.dstat); > + print_int(" cpa=", irb.scsw.cpa); > + print_int(" prev_ccw=", *((uint64_t *)prev_ccw)); > + print_int(" this_ccw=", *((uint64_t *)this_ccw)); > + } > + > + return 0; > +}
On 07/06/2018 09:04 AM, Christian Borntraeger wrote: > > > On 07/05/2018 07:25 PM, Jason J. Herne wrote: >> From: "Jason J. Herne" <jjherne@linux.vnet.ibm.com> >> >> Add struct for format-0 ccws. Support executing format-0 channel >> programs and waiting for their completion before continuing execution. >> This will be used for real dasd ipl. >> >> Add cu_type() to channel io library. This will be used to query control >> unit type which is used to determine if we are booting a virtio device or a >> real dasd device. >> >> Signed-off-by: Jason J. Herne <jjherne@linux.vnet.ibm.com> >> Signed-off-by: Jason J. Herne <jjherne@linux.ibm.com> >> --- >> pc-bios/s390-ccw/cio.c | 127 +++++++++++++++++++++++++++++++++++++++++++++++++ >> pc-bios/s390-ccw/cio.h | 25 +++++++++- >> 2 files changed, 151 insertions(+), 1 deletion(-) >> >> diff --git a/pc-bios/s390-ccw/cio.c b/pc-bios/s390-ccw/cio.c >> index 095f79b..f440380 100644 >> --- a/pc-bios/s390-ccw/cio.c >> +++ b/pc-bios/s390-ccw/cio.c >> @@ -10,6 +10,7 @@ >> >> #include "libc.h" >> #include "s390-ccw.h" >> +#include "s390-arch.h" >> #include "cio.h" >> >> static char chsc_page[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); >> @@ -39,3 +40,129 @@ void enable_subchannel(SubChannelId schid) >> schib.pmcw.ena = 1; >> msch(schid, &schib); >> } >> + >> +__u16 cu_type(SubChannelId schid) >> +{ >> + Ccw1 senseIdCcw; >> + SenseId senseData; >> + >> + senseIdCcw.cmd_code = CCW_CMD_SENSE_ID; >> + senseIdCcw.cda = ptr2u32(&senseData); >> + senseIdCcw.count = sizeof(senseData); >> + >> + if (do_cio(schid, ptr2u32(&senseIdCcw), CCW_FMT1)) { >> + panic("Failed to run SenseID CCw\n"); >> + } >> + >> + return senseData.cu_type; >> +} >> + >> +static bool irb_error(Irb *irb) >> +{ >> + /* We have to ignore Incorrect Length (cstat == 0x40) indicators because >> + * real devices expect a 24 byte SenseID buffer, and virtio devices expect >> + * a much larger buffer. Neither device type can tolerate a buffer size >> + * different from what they expect so they set this indicator. >> + */ >> + if (irb->scsw.cstat != 0x00 && irb->scsw.cstat != 0x40) { >> + return true; >> + } >> + return irb->scsw.dstat != 0xc; >> +} >> + >> +/* Executes a channel program at a given subchannel. The request to run the >> + * channel program is sent to the subchannel, we then wait for the interrupt >> + * singaling completion of the I/O operation(s) perfomed by the channel >> + * program. Lastly we verify that the i/o operation completed without error and >> + * that the interrupt we received was for the subchannel used to run the >> + * channel program. >> + * >> + * Note: This function assumes it is running in an environment where no other >> + * cpus are generating or receiving I/O interrupts. So either run it in a >> + * single-cpu environment or make sure all other cpus are not doing I/O and >> + * have I/O interrupts masked off. >> + */ >> +int do_cio(SubChannelId schid, uint32_t ccw_addr, int fmt) >> +{ >> + Ccw0 *this_ccw, *prev_ccw; >> + CmdOrb orb = {}; >> + Irb irb = {}; >> + int rc; >> + >> + IPL_assert(fmt == 0 || fmt == 1, "Invalid ccw format"); >> + >> + /* ccw_addr must be <= 24 bits and point to at least one whole ccw. */ >> + if (fmt == 0) { >> + IPL_assert(ccw_addr <= 0xFFFFFF - 8, "Invalid ccw address"); >> + } >> + >> + orb.fmt = fmt ; >> + orb.pfch = 1; /* QEMU's cio implementation requires prefetch */ >> + orb.c64 = 1; /* QEMU's cio implementation requires 64-bit idaws */ >> + orb.lpm = 0xFF; /* All paths allowed */ >> + orb.cpa = ccw_addr; >> + >> + rc = ssch(schid, &orb); >> + if (rc) { >> + print_int("ssch failed with rc=", rc); >> + return rc; >> + } >> + >> + await_io_int(schid.sch_no); >> + >> + /* Clear read */ >> + rc = tsch(schid, &irb); >> + if (rc) { >> + print_int("tsch failed with rc=", rc); >> + return rc; >> + } >> + >> + if (irb_error(&irb)) { >> + this_ccw = u32toptr(irb.scsw.cpa); >> + prev_ccw = u32toptr(irb.scsw.cpa - 8); >> + >> + print_int("irb_error: cstat=", irb.scsw.cstat); >> + print_int(" dstat=", irb.scsw.dstat); >> + print_int(" cpa=", irb.scsw.cpa); >> + print_int(" prev_ccw=", *((uint64_t *)prev_ccw)); >> + print_int(" this_ccw=", *((uint64_t *)this_ccw)); >> + } >> + >> + return 0; >> +} >> + >> +void await_io_int(uint16_t sch_no) >> +{ >> + /* >> + * wait_psw and ctl6 must be static to avoid stack allocation as gcc cannot >> + * align stack variables. The stctg, lctlg and lpswe instructions require >> + * that their operands be aligned on an 8-byte boundary. >> + */ >> + static uint64_t ctl6 __attribute__((__aligned__(8))); >> + static PSW wait_psw; >> + >> + /* PSW to load when I/O interrupt happens */ >> + lowcore->io_new_psw.mask = PSW_MASK_ZMODE; >> + lowcore->io_new_psw.addr = (uint64_t)&&IOIntWakeup; /* Wake-up address */ >> + >> + /* Enable io interrupts subclass mask */ >> + asm volatile("stctg 6,6,%0" : "=S" (ctl6) : : "memory"); > > For some reason the sles12 gcc does not like this. Can you use the "Q" constraint > everywhere instead of "S". > Or better use "QS" and "=QS"
On 07/06/2018 10:03 AM, Cornelia Huck wrote: > On Thu, 5 Jul 2018 13:25:38 -0400 > "Jason J. Herne" <jjherne@linux.ibm.com> wrote: > >> From: "Jason J. Herne" <jjherne@linux.vnet.ibm.com> >> >> Add struct for format-0 ccws. Support executing format-0 channel > >> + */ >> + if (irb->scsw.cstat != 0x00 && irb->scsw.cstat != 0x40) { >> + return true; >> + } >> + return irb->scsw.dstat != 0xc; >> +} >> + >> +/* Executes a channel program at a given subchannel. The request to run the >> + * channel program is sent to the subchannel, we then wait for the interrupt >> + * singaling completion of the I/O operation(s) perfomed by the channel >> + * program. Lastly we verify that the i/o operation completed without error and >> + * that the interrupt we received was for the subchannel used to run the >> + * channel program. > > Finally, real interrupts instead of polling in the s390-ccw bios, > nice :) > >> + * >> + * Note: This function assumes it is running in an environment where no other >> + * cpus are generating or receiving I/O interrupts. So either run it in a >> + * single-cpu environment or make sure all other cpus are not doing I/O and >> + * have I/O interrupts masked off. >> + */ >> +int do_cio(SubChannelId schid, uint32_t ccw_addr, int fmt) >> +{ >> + Ccw0 *this_ccw, *prev_ccw; >> + CmdOrb orb = {}; >> + Irb irb = {}; >> + int rc; >> + >> + IPL_assert(fmt == 0 || fmt == 1, "Invalid ccw format"); >> + >> + /* ccw_addr must be <= 24 bits and point to at least one whole ccw. */ >> + if (fmt == 0) { >> + IPL_assert(ccw_addr <= 0xFFFFFF - 8, "Invalid ccw address"); >> + } >> + >> + orb.fmt = fmt ; >> + orb.pfch = 1; /* QEMU's cio implementation requires prefetch */ >> + orb.c64 = 1; /* QEMU's cio implementation requires 64-bit idaws */ >> + orb.lpm = 0xFF; /* All paths allowed */ >> + orb.cpa = ccw_addr; >> + >> + rc = ssch(schid, &orb); >> + if (rc) { >> + print_int("ssch failed with rc=", rc); >> + return rc; > > Are you doing anything like retrying on cc 1/2? It's probably fine to > give up on cc 3. > We are in IPL context. We should have exclusive access to the DASD we are IPL-ing from, or not? My understanding was that if the layers below us don't violate the rules, we should never observe cc 1 or 2 here. Or am I wrong? Regards, Halil
On Fri, 6 Jul 2018 13:42:25 +0200 Halil Pasic <pasic@linux.ibm.com> wrote: > On 07/06/2018 10:03 AM, Cornelia Huck wrote: > > On Thu, 5 Jul 2018 13:25:38 -0400 > > "Jason J. Herne" <jjherne@linux.ibm.com> wrote: > >> + rc = ssch(schid, &orb); > >> + if (rc) { > >> + print_int("ssch failed with rc=", rc); > >> + return rc; > > > > Are you doing anything like retrying on cc 1/2? It's probably fine to > > give up on cc 3. > > > > We are in IPL context. We should have exclusive access to the DASD > we are IPL-ing from, or not? My understanding was that if the layers > below us don't violate the rules, we should never observe cc 1 or 2 > here. Or am I wrong? cc 2 probably not (nobody should be doing a halt or a clear), but cc 1 if there's some unsolicited status pending?
On 07/06/2018 02:26 PM, Cornelia Huck wrote: > On Fri, 6 Jul 2018 13:42:25 +0200 > Halil Pasic <pasic@linux.ibm.com> wrote: > >> On 07/06/2018 10:03 AM, Cornelia Huck wrote: >>> On Thu, 5 Jul 2018 13:25:38 -0400 >>> "Jason J. Herne" <jjherne@linux.ibm.com> wrote: >>>> + rc = ssch(schid, &orb); >>>> + if (rc) { >>>> + print_int("ssch failed with rc=", rc); >>>> + return rc; >>> >>> Are you doing anything like retrying on cc 1/2? It's probably fine to >>> give up on cc 3. >>> >> >> We are in IPL context. We should have exclusive access to the DASD >> we are IPL-ing from, or not? My understanding was that if the layers >> below us don't violate the rules, we should never observe cc 1 or 2 >> here. Or am I wrong? > > cc 2 probably not (nobody should be doing a halt or a clear), but cc 1 > if there's some unsolicited status pending? > AFAIK we are supposed to give up then. The status was supposed to be cleared away on the reset which precedes IPL. If the device presented an unsolicited status between reset and starting the IPL IO on the channel I think the IPL should fail. Regards, Halil
On Fri, 6 Jul 2018 15:03:49 +0200 Halil Pasic <pasic@linux.ibm.com> wrote: > On 07/06/2018 02:26 PM, Cornelia Huck wrote: > > On Fri, 6 Jul 2018 13:42:25 +0200 > > Halil Pasic <pasic@linux.ibm.com> wrote: > > > >> On 07/06/2018 10:03 AM, Cornelia Huck wrote: > >>> On Thu, 5 Jul 2018 13:25:38 -0400 > >>> "Jason J. Herne" <jjherne@linux.ibm.com> wrote: > >>>> + rc = ssch(schid, &orb); > >>>> + if (rc) { > >>>> + print_int("ssch failed with rc=", rc); One nit: make this 'cc' instead of 'rc'? > >>>> + return rc; > >>> > >>> Are you doing anything like retrying on cc 1/2? It's probably fine to > >>> give up on cc 3. > >>> > >> > >> We are in IPL context. We should have exclusive access to the DASD > >> we are IPL-ing from, or not? My understanding was that if the layers > >> below us don't violate the rules, we should never observe cc 1 or 2 > >> here. Or am I wrong? > > > > cc 2 probably not (nobody should be doing a halt or a clear), but cc 1 > > if there's some unsolicited status pending? > > > > AFAIK we are supposed to give up then. The status was supposed to be > cleared away on the reset which precedes IPL. If the device presented > an unsolicited status between reset and starting the IPL IO on the > channel I think the IPL should fail. I very dimly remember efforts to make (non-QEMU) IPL more robust, but I think that was more about IFCCs and the like. Anyway, it is probably not worth spending too much time on. Being able to IPL from vfio-ccw handled dasd is already very nice :)
On 07/06/2018 03:15 PM, Cornelia Huck wrote: > On Fri, 6 Jul 2018 15:03:49 +0200 > Halil Pasic <pasic@linux.ibm.com> wrote: > >> On 07/06/2018 02:26 PM, Cornelia Huck wrote: >>> On Fri, 6 Jul 2018 13:42:25 +0200 >>> Halil Pasic <pasic@linux.ibm.com> wrote: >>> >>>> On 07/06/2018 10:03 AM, Cornelia Huck wrote: >>>>> On Thu, 5 Jul 2018 13:25:38 -0400 >>>>> "Jason J. Herne" <jjherne@linux.ibm.com> wrote: >>>>>> + rc = ssch(schid, &orb); >>>>>> + if (rc) { >>>>>> + print_int("ssch failed with rc=", rc); > > One nit: make this 'cc' instead of 'rc'? > >>>>>> + return rc; >>>>> >>>>> Are you doing anything like retrying on cc 1/2? It's probably fine to >>>>> give up on cc 3. >>>>> >>>> >>>> We are in IPL context. We should have exclusive access to the DASD >>>> we are IPL-ing from, or not? My understanding was that if the layers >>>> below us don't violate the rules, we should never observe cc 1 or 2 >>>> here. Or am I wrong? >>> >>> cc 2 probably not (nobody should be doing a halt or a clear), but cc 1 >>> if there's some unsolicited status pending? >>> >> >> AFAIK we are supposed to give up then. The status was supposed to be >> cleared away on the reset which precedes IPL. If the device presented >> an unsolicited status between reset and starting the IPL IO on the >> channel I think the IPL should fail. > > I very dimly remember efforts to make (non-QEMU) IPL more robust, but I > think that was more about IFCCs and the like. > > Anyway, it is probably not worth spending too much time on. Being able > to IPL from vfio-ccw handled dasd is already very nice :) > PoP does not say much, and the internal documentation is, well internal. Maybe we could do more on IFCC but we don't have to. One other thing where we could do more is diagnostic info. But I really think this is good enough for the first shot. Regards, Halil
On 07/06/2018 04:03 AM, Cornelia Huck wrote: > On Thu, 5 Jul 2018 13:25:38 -0400 > "Jason J. Herne" <jjherne@linux.ibm.com> wrote: > ... >> + >> + senseIdCcw.cmd_code = CCW_CMD_SENSE_ID; >> + senseIdCcw.cda = ptr2u32(&senseData); > > Are we sure that this is always under 2G? > I thought I saw somewhere that Qemu always loads the bios at the high end of guest memory or right under the 2 GB line if guest memory is greater than 2 GB... I cannot remember the source of this information but I do use it when calculating where to find the bios code for debugging with GDB. Here is the formula used: let "FW_BASE=RAM_SIZE > 0x80000000 ? 0x80000000 : RAM_SIZE" let "FW_BASE=(FW_BASE - 0x200000) & (~0xffff)" So 2GB - 2MB is where we seem to load the bios. Can anyone provide more concrete info? FWIW I've tried booting a 6 GB guest and it works :). >> + senseIdCcw.count = sizeof(senseData); >> + >> + if (do_cio(schid, ptr2u32(&senseIdCcw), CCW_FMT1)) { >> + panic("Failed to run SenseID CCw\n"); >> + } >> + >> + return senseData.cu_type; >> +} >> + >> +static bool irb_error(Irb *irb) >> +{ >> + /* We have to ignore Incorrect Length (cstat == 0x40) indicators because >> + * real devices expect a 24 byte SenseID buffer, and virtio devices expect >> + * a much larger buffer. Neither device type can tolerate a buffer size >> + * different from what they expect so they set this indicator. > > Hm... do you have details? Is that basic vs. extended SenseID > information? > > (If the code in QEMU is making incorrect assumptions, I'd like to fix > that.) I really have no idea and was hoping someone who knows virtio ccw better than myself would have some ideas. This comment simply documents what I discovered in testing. I can look into it more if no one else has any information on this. FWIW, it has been working without issue in my testing. >> + */ >> + if (irb->scsw.cstat != 0x00 && irb->scsw.cstat != 0x40) { >> + return true; >> + } >> + return irb->scsw.dstat != 0xc; >> +} >> + >> +/* Executes a channel program at a given subchannel. The request to run the >> + * channel program is sent to the subchannel, we then wait for the interrupt >> + * singaling completion of the I/O operation(s) perfomed by the channel >> + * program. Lastly we verify that the i/o operation completed without error and >> + * that the interrupt we received was for the subchannel used to run the >> + * channel program. > > Finally, real interrupts instead of polling in the s390-ccw bios, > nice :) > Yep, I was happy to replace the polling. This probably needs wider testing however as it is tricky enough to hide some really weird side effects as I discovered a few times during development. >> + * >> + * Note: This function assumes it is running in an environment where no other >> + * cpus are generating or receiving I/O interrupts. So either run it in a >> + * single-cpu environment or make sure all other cpus are not doing I/O and >> + * have I/O interrupts masked off. >> + */ >> +int do_cio(SubChannelId schid, uint32_t ccw_addr, int fmt) >> +{ >> + Ccw0 *this_ccw, *prev_ccw; >> + CmdOrb orb = {}; >> + Irb irb = {}; >> + int rc; >> + >> + IPL_assert(fmt == 0 || fmt == 1, "Invalid ccw format"); >> + >> + /* ccw_addr must be <= 24 bits and point to at least one whole ccw. */ >> + if (fmt == 0) { >> + IPL_assert(ccw_addr <= 0xFFFFFF - 8, "Invalid ccw address"); >> + } >> + >> + orb.fmt = fmt ; >> + orb.pfch = 1; /* QEMU's cio implementation requires prefetch */ >> + orb.c64 = 1; /* QEMU's cio implementation requires 64-bit idaws */ >> + orb.lpm = 0xFF; /* All paths allowed */ >> + orb.cpa = ccw_addr; >> + >> + rc = ssch(schid, &orb); >> + if (rc) { >> + print_int("ssch failed with rc=", rc); >> + return rc; > > Are you doing anything like retrying on cc 1/2? It's probably fine to > give up on cc 3. > >> + } >> + >> + await_io_int(schid.sch_no); >> + >> + /* Clear read */ >> + rc = tsch(schid, &irb); >> + if (rc) { >> + print_int("tsch failed with rc=", rc); >> + return rc; > > If you get a cc 1 here (no status pending), that's probably an internal > error (as you just did a successful ssch and assume you got an I/O > interrupt). If you get a cc 3, it's probably a good idea to give up on > this subchannel. At the moment the code treats any non-zero CC as an error and returns it to the caller.
On 07/06/2018 09:33 AM, Halil Pasic wrote: > > > On 07/06/2018 03:15 PM, Cornelia Huck wrote: >> On Fri, 6 Jul 2018 15:03:49 +0200 >> Halil Pasic <pasic@linux.ibm.com> wrote: >> >>> On 07/06/2018 02:26 PM, Cornelia Huck wrote: >>>> On Fri, 6 Jul 2018 13:42:25 +0200 >>>> Halil Pasic <pasic@linux.ibm.com> wrote: >>>>> On 07/06/2018 10:03 AM, Cornelia Huck wrote: >>>>>> On Thu, 5 Jul 2018 13:25:38 -0400 >>>>>> "Jason J. Herne" <jjherne@linux.ibm.com> wrote: >>>>>>> + rc = ssch(schid, &orb); >>>>>>> + if (rc) { >>>>>>> + print_int("ssch failed with rc=", rc); >> >> One nit: make this 'cc' instead of 'rc'? >> >>>>>>> + return rc; >>>>>> >>>>>> Are you doing anything like retrying on cc 1/2? It's probably fine to >>>>>> give up on cc 3. >>>>> >>>>> We are in IPL context. We should have exclusive access to the DASD >>>>> we are IPL-ing from, or not? My understanding was that if the layers >>>>> below us don't violate the rules, we should never observe cc 1 or 2 >>>>> here. Or am I wrong? >>>> >>>> cc 2 probably not (nobody should be doing a halt or a clear), but cc 1 >>>> if there's some unsolicited status pending? >>> >>> AFAIK we are supposed to give up then. The status was supposed to be >>> cleared away on the reset which precedes IPL. If the device presented >>> an unsolicited status between reset and starting the IPL IO on the >>> channel I think the IPL should fail. >> >> I very dimly remember efforts to make (non-QEMU) IPL more robust, but I >> think that was more about IFCCs and the like. >> >> Anyway, it is probably not worth spending too much time on. Being able >> to IPL from vfio-ccw handled dasd is already very nice :) >> > > PoP does not say much, and the internal documentation is, well internal. > Maybe we could do more on IFCC but we don't have to. One other thing > where we could do more is diagnostic info. But I really think this is > good enough for the first shot. > My thought was similar to what Halil says. We just cleared any unsolicited status. If that condition persists or we hit some crazy timing window where device characteristics have changed then we likely should re-drive the whole process anyway. Also, given that this is a vfio-ccw device I suspect we are far enough isolated from the "real" device that the host kernel will handle those unsolicited interrupts and we won't ever see them. Can anyone confirm or deny that? For IPL, I kind of feel this is not worth worrying about because the window is very small and it is only a very temporary problem for IPL (a one-shot operation). Feel free to disagree :)
On 07/05/2018 07:25 PM, Jason J. Herne wrote: > From: "Jason J. Herne" <jjherne@linux.vnet.ibm.com> > +void await_io_int(uint16_t sch_no) > +{ > + /* > + * wait_psw and ctl6 must be static to avoid stack allocation as gcc cannot > + * align stack variables. The stctg, lctlg and lpswe instructions require > + * that their operands be aligned on an 8-byte boundary. > + */ In fact, the ABI guarantees that the stack is always 8 byte aligned and gcc should align any uint64_t to 8 byte as well on the stack. > + static uint64_t ctl6 __attribute__((__aligned__(8))); > + static PSW wait_psw; > +
On 07/05/2018 07:25 PM, Jason J. Herne wrote: Instead of doing this > +void await_io_int(uint16_t sch_no) > +{ > + /* > + * wait_psw and ctl6 must be static to avoid stack allocation as gcc cannot > + * align stack variables. The stctg, lctlg and lpswe instructions require > + * that their operands be aligned on an 8-byte boundary. > + */ > + static uint64_t ctl6 __attribute__((__aligned__(8))); > + static PSW wait_psw; > + > + /* PSW to load when I/O interrupt happens */ > + lowcore->io_new_psw.mask = PSW_MASK_ZMODE; > + lowcore->io_new_psw.addr = (uint64_t)&&IOIntWakeup; /* Wake-up address */ > + > + /* Enable io interrupts subclass mask */ > + asm volatile("stctg 6,6,%0" : "=S" (ctl6) : : "memory"); > + ctl6 |= 0x00000000FF000000; > + asm volatile("lctlg 6,6,%0" : : "S" (ctl6)); > + > + /* Set wait psw enabled for io interrupt */ > + wait_psw.mask = (PSW_MASK_ZMODE | PSW_MASK_IOINT | PSW_MASK_WAIT); > + asm volatile("lpswe %0" : : "Q" (wait_psw) : "cc"); > + > + panic("await_io_int: lpswe failed!!\n"); > + > +IOIntWakeup: > + /* Should never happen - all other subchannels are disabled by default */ > + IPL_assert(lowcore->subchannel_nr == sch_no, > + "Interrupt from unexpected device"); > + > + /* Disable all subclasses of I/O interrupts for this cpu */ > + asm volatile("stctg 6,6,%0" : "=S" (ctl6) : : "memory"); > + ctl6 &= ~(0x00000000FF000000); > + asm volatile("lctlg 6,6,%0" : : "S" (ctl6)); > +} Can you please add something like consume_sclp_interrupt in start.S? Jumping inside a function from an interrupt can mess up the stack and possibly other things as gcc does not expect this. (you do a longjmp without doing any precaution) This might explain why you needed the static hack for the variables.
On Fri, 6 Jul 2018 10:35:06 -0400 "Jason J. Herne" <jjherne@linux.ibm.com> wrote: > On 07/06/2018 04:03 AM, Cornelia Huck wrote: > > On Thu, 5 Jul 2018 13:25:38 -0400 > > "Jason J. Herne" <jjherne@linux.ibm.com> wrote: > >> + senseIdCcw.count = sizeof(senseData); > >> + > >> + if (do_cio(schid, ptr2u32(&senseIdCcw), CCW_FMT1)) { > >> + panic("Failed to run SenseID CCw\n"); > >> + } > >> + > >> + return senseData.cu_type; > >> +} > >> + > >> +static bool irb_error(Irb *irb) > >> +{ > >> + /* We have to ignore Incorrect Length (cstat == 0x40) indicators because > >> + * real devices expect a 24 byte SenseID buffer, and virtio devices expect > >> + * a much larger buffer. Neither device type can tolerate a buffer size > >> + * different from what they expect so they set this indicator. > > > > Hm... do you have details? Is that basic vs. extended SenseID > > information? > > > > (If the code in QEMU is making incorrect assumptions, I'd like to fix > > that.) > > I really have no idea and was hoping someone who knows virtio ccw better > than myself would have some ideas. This comment simply documents what I > discovered in testing. I can look into it more if no one else has any > information on this. FWIW, it has been working without issue in my testing. OK, I've looked at this a bit more. It seems that we always get at least the basic information; the command, however, provides for a variable length (the CIWs). The Linux kernel always tries to get the maximum length (basic sense id information + maximum number possible number of CIWs), but sets SLI as it may get less of that. I'm not sure what I should return in QEMU for emulated devices, the documentation of SenseID I could find is a bit light on details (as the data can be of variable length). I think you should simply always use SLI, as you don't know the length of the SenseID data you can get beforehand. (BTW, is there a newer public version of SA22-7204-01 - Common I/O Device Commands - available? That one is a bit dated.)
diff --git a/pc-bios/s390-ccw/cio.c b/pc-bios/s390-ccw/cio.c index 095f79b..f440380 100644 --- a/pc-bios/s390-ccw/cio.c +++ b/pc-bios/s390-ccw/cio.c @@ -10,6 +10,7 @@ #include "libc.h" #include "s390-ccw.h" +#include "s390-arch.h" #include "cio.h" static char chsc_page[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); @@ -39,3 +40,129 @@ void enable_subchannel(SubChannelId schid) schib.pmcw.ena = 1; msch(schid, &schib); } + +__u16 cu_type(SubChannelId schid) +{ + Ccw1 senseIdCcw; + SenseId senseData; + + senseIdCcw.cmd_code = CCW_CMD_SENSE_ID; + senseIdCcw.cda = ptr2u32(&senseData); + senseIdCcw.count = sizeof(senseData); + + if (do_cio(schid, ptr2u32(&senseIdCcw), CCW_FMT1)) { + panic("Failed to run SenseID CCw\n"); + } + + return senseData.cu_type; +} + +static bool irb_error(Irb *irb) +{ + /* We have to ignore Incorrect Length (cstat == 0x40) indicators because + * real devices expect a 24 byte SenseID buffer, and virtio devices expect + * a much larger buffer. Neither device type can tolerate a buffer size + * different from what they expect so they set this indicator. + */ + if (irb->scsw.cstat != 0x00 && irb->scsw.cstat != 0x40) { + return true; + } + return irb->scsw.dstat != 0xc; +} + +/* Executes a channel program at a given subchannel. The request to run the + * channel program is sent to the subchannel, we then wait for the interrupt + * singaling completion of the I/O operation(s) perfomed by the channel + * program. Lastly we verify that the i/o operation completed without error and + * that the interrupt we received was for the subchannel used to run the + * channel program. + * + * Note: This function assumes it is running in an environment where no other + * cpus are generating or receiving I/O interrupts. So either run it in a + * single-cpu environment or make sure all other cpus are not doing I/O and + * have I/O interrupts masked off. + */ +int do_cio(SubChannelId schid, uint32_t ccw_addr, int fmt) +{ + Ccw0 *this_ccw, *prev_ccw; + CmdOrb orb = {}; + Irb irb = {}; + int rc; + + IPL_assert(fmt == 0 || fmt == 1, "Invalid ccw format"); + + /* ccw_addr must be <= 24 bits and point to at least one whole ccw. */ + if (fmt == 0) { + IPL_assert(ccw_addr <= 0xFFFFFF - 8, "Invalid ccw address"); + } + + orb.fmt = fmt ; + orb.pfch = 1; /* QEMU's cio implementation requires prefetch */ + orb.c64 = 1; /* QEMU's cio implementation requires 64-bit idaws */ + orb.lpm = 0xFF; /* All paths allowed */ + orb.cpa = ccw_addr; + + rc = ssch(schid, &orb); + if (rc) { + print_int("ssch failed with rc=", rc); + return rc; + } + + await_io_int(schid.sch_no); + + /* Clear read */ + rc = tsch(schid, &irb); + if (rc) { + print_int("tsch failed with rc=", rc); + return rc; + } + + if (irb_error(&irb)) { + this_ccw = u32toptr(irb.scsw.cpa); + prev_ccw = u32toptr(irb.scsw.cpa - 8); + + print_int("irb_error: cstat=", irb.scsw.cstat); + print_int(" dstat=", irb.scsw.dstat); + print_int(" cpa=", irb.scsw.cpa); + print_int(" prev_ccw=", *((uint64_t *)prev_ccw)); + print_int(" this_ccw=", *((uint64_t *)this_ccw)); + } + + return 0; +} + +void await_io_int(uint16_t sch_no) +{ + /* + * wait_psw and ctl6 must be static to avoid stack allocation as gcc cannot + * align stack variables. The stctg, lctlg and lpswe instructions require + * that their operands be aligned on an 8-byte boundary. + */ + static uint64_t ctl6 __attribute__((__aligned__(8))); + static PSW wait_psw; + + /* PSW to load when I/O interrupt happens */ + lowcore->io_new_psw.mask = PSW_MASK_ZMODE; + lowcore->io_new_psw.addr = (uint64_t)&&IOIntWakeup; /* Wake-up address */ + + /* Enable io interrupts subclass mask */ + asm volatile("stctg 6,6,%0" : "=S" (ctl6) : : "memory"); + ctl6 |= 0x00000000FF000000; + asm volatile("lctlg 6,6,%0" : : "S" (ctl6)); + + /* Set wait psw enabled for io interrupt */ + wait_psw.mask = (PSW_MASK_ZMODE | PSW_MASK_IOINT | PSW_MASK_WAIT); + asm volatile("lpswe %0" : : "Q" (wait_psw) : "cc"); + + panic("await_io_int: lpswe failed!!\n"); + +IOIntWakeup: + /* Should never happen - all other subchannels are disabled by default */ + IPL_assert(lowcore->subchannel_nr == sch_no, + "Interrupt from unexpected device"); + + /* Disable all subclasses of I/O interrupts for this cpu */ + asm volatile("stctg 6,6,%0" : "=S" (ctl6) : : "memory"); + ctl6 &= ~(0x00000000FF000000); + asm volatile("lctlg 6,6,%0" : : "S" (ctl6)); +} diff --git a/pc-bios/s390-ccw/cio.h b/pc-bios/s390-ccw/cio.h index 7b07d75..d8e2955 100644 --- a/pc-bios/s390-ccw/cio.h +++ b/pc-bios/s390-ccw/cio.h @@ -127,7 +127,23 @@ struct tpi_info { __u32 reserved4 : 12; } __attribute__ ((packed, aligned(4))); -/* channel command word (type 1) */ +/* channel command word (format 0) */ +typedef struct ccw0 { + __u8 cmd_code; + __u32 cda : 24; + __u32 chainData : 1; + __u32 chain : 1; + __u32 sli : 1; + __u32 skip : 1; + __u32 pci : 1; + __u32 ida : 1; + __u32 suspend : 1; + __u32 mida : 1; + __u8 reserved; + __u16 count; +} __attribute__ ((packed, aligned(8))) Ccw0; + +/* channel command word (format 1) */ typedef struct ccw1 { __u8 cmd_code; __u8 flags; @@ -135,6 +151,10 @@ typedef struct ccw1 { __u32 cda; } __attribute__ ((packed, aligned(8))) Ccw1; +/* do_cio() CCW formats */ +#define CCW_FMT0 0x00 +#define CCW_FMT1 0x01 + #define CCW_FLAG_DC 0x80 #define CCW_FLAG_CC 0x40 #define CCW_FLAG_SLI 0x20 @@ -215,6 +235,9 @@ typedef struct irb { int enable_mss_facility(void); void enable_subchannel(SubChannelId schid); +__u16 cu_type(SubChannelId schid); +void await_io_int(uint16_t sch_no); +int do_cio(SubChannelId schid, uint32_t ccw_addr, int fmt); /* * Some S390 specific IO instructions as inline