Message ID | 20220109205640.4126817-1-philipp.tomsich@vrull.eu |
---|---|
State | New |
Headers | show |
Series | [v1,1/2] decodetree: Add an optional predicate-function for decoding | expand |
Hi Philipp, On 1/9/22 21:56, Philipp Tomsich wrote: > This adds the possibility to specify a predicate-function that is > called as part of decoding in multi-patterns; it is intended for > use-cases (such as vendor-defined instructions in RISC-V) where the > same bitpattern may decode into different functions depending on the > overall configuration of the emulation target. But for a particular CPU, its "vendor ISAs" won't change at runtime. Since we know this at build time, I don't understand why you need predicate support at all. > > At this time, we only support predicates for multi-patterns. > > Signed-off-by: Philipp Tomsich <philipp.tomsich@vrull.eu> > > --- > > docs/devel/decodetree.rst | 7 ++++++- > scripts/decodetree.py | 24 +++++++++++++++++++++--- > 2 files changed, 27 insertions(+), 4 deletions(-) > > diff --git a/docs/devel/decodetree.rst b/docs/devel/decodetree.rst > index 49ea50c2a7..241aaec8bb 100644 > --- a/docs/devel/decodetree.rst > +++ b/docs/devel/decodetree.rst > @@ -144,9 +144,10 @@ Patterns > Syntax:: > > pat_def := identifier ( pat_elt )+ > - pat_elt := fixedbit_elt | field_elt | field_ref | args_ref | fmt_ref | const_elt > + pat_elt := fixedbit_elt | field_elt | field_ref | args_ref | fmt_ref | const_elt | predicate > fmt_ref := '@' identifier > const_elt := identifier '=' number > + predicate := '|' identifier > > The *fixedbit_elt* and *field_elt* specifiers are unchanged from formats. > A pattern that does not specify a named format will have one inferred > @@ -156,6 +157,10 @@ A *const_elt* allows a argument to be set to a constant value. This may > come in handy when fields overlap between patterns and one has to > include the values in the *fixedbit_elt* instead. > > +A *predicate* allows to specify a predicate function (returing true or > +false) to determine the applicability of the pattern. Currently, this > +will change the decode-behaviour for overlapping multi-patterns only. > + > The decoder will call a translator function for each pattern matched. > > Pattern examples:: > diff --git a/scripts/decodetree.py b/scripts/decodetree.py > index a03dc6b5e3..7da2282411 100644 > --- a/scripts/decodetree.py > +++ b/scripts/decodetree.py > @@ -52,6 +52,7 @@ > re_fld_ident = '%[a-zA-Z0-9_]*' > re_fmt_ident = '@[a-zA-Z0-9_]*' > re_pat_ident = '[a-zA-Z0-9_]*' > +re_predicate_ident = '\|[a-zA-Z_][a-zA-Z0-9_]*' > > def error_with_file(file, lineno, *args): > """Print an error message from file:line and args and exit.""" > @@ -119,6 +120,14 @@ def whexC(val): > suffix = 'u' > return whex(val) + suffix > > +def predicate(val): > + """Return a string for calling a predicate function > + (if specified, accepting 'None' as an indication > + that no predicate is to be emitted) with the ctx > + as a parameter.""" > + if (val == None): > + return '' > + return ' && ' + val + '(ctx)' > > def str_match_bits(bits, mask): > """Return a string pretty-printing BITS/MASK""" > @@ -340,7 +349,7 @@ def output_def(self): > > class General: > """Common code between instruction formats and instruction patterns""" > - def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w): > + def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w, p = None): > self.name = name > self.file = input_file > self.lineno = lineno > @@ -351,6 +360,7 @@ def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w): > self.fieldmask = fldm > self.fields = flds > self.width = w > + self.predicate = p > > def __str__(self): > return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask) > @@ -499,7 +509,7 @@ def output_code(self, i, extracted, outerbits, outermask): > if outermask != p.fixedmask: > innermask = p.fixedmask & ~outermask > innerbits = p.fixedbits & ~outermask > - output(ind, f'if ((insn & {whexC(innermask)}) == {whexC(innerbits)}) {{\n') > + output(ind, f'if ((insn & {whexC(innermask)}) == {whexC(innerbits)}{predicate(p.predicate)}) {{\n') > output(ind, f' /* {str_match_bits(p.fixedbits, p.fixedmask)} */\n') > p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask) > output(ind, '}\n') > @@ -826,6 +836,7 @@ def parse_generic(lineno, parent_pat, name, toks): > global re_fld_ident > global re_fmt_ident > global re_C_ident > + global re_predicate_ident > global insnwidth > global insnmask > global variablewidth > @@ -839,6 +850,7 @@ def parse_generic(lineno, parent_pat, name, toks): > flds = {} > arg = None > fmt = None > + predicate = None > for t in toks: > # '&Foo' gives a format an explicit argument set. > if re.fullmatch(re_arg_ident, t): > @@ -881,6 +893,12 @@ def parse_generic(lineno, parent_pat, name, toks): > flds = add_field(lineno, flds, fname, ConstField(value)) > continue > > + # '|predicate' sets a predicate function to be called. > + if re.fullmatch(re_predicate_ident, t): > + tt = t[1:] > + predicate = tt; > + continue > + > # Pattern of 0s, 1s, dots and dashes indicate required zeros, > # required ones, or dont-cares. > if re.fullmatch('[01.-]+', t): > @@ -979,7 +997,7 @@ def parse_generic(lineno, parent_pat, name, toks): > if f not in flds.keys() and f not in fmt.fields.keys(): > error(lineno, f'field {f} not initialized') > pat = Pattern(name, lineno, fmt, fixedbits, fixedmask, > - undefmask, fieldmask, flds, width) > + undefmask, fieldmask, flds, width, predicate) > parent_pat.pats.append(pat) > allpatterns.append(pat) >
For RISC-V the opcode decode will change between different vendor implementations of RISC-V (emulated by the same qemu binary). Any two vendors may reuse the same opcode space, e.g., we may end up with: # *** RV64 Custom-3 Extension *** { vt_maskc 0000000 ..... ..... 110 ..... 1111011 @r |has_xventanacondops_p vt_maskcn 0000000 ..... ..... 111 ..... 1111011 @r |has_xventanacondops_p someone_something ............ ..... 000 ..... 1100111 @i |has_xsomeonesomething_p } With extensions being enabled either from the commandline -cpu any,xventanacondops=true or possibly even having a AMP in one emulation setup (e.g. application cores having one extension and power-mangement cores having a different one — or even a conflicting one). Cheers, Philipp. On Mon, 10 Jan 2022 at 10:43, Philippe Mathieu-Daudé <f4bug@amsat.org> wrote: > > Hi Philipp, > > On 1/9/22 21:56, Philipp Tomsich wrote: > > This adds the possibility to specify a predicate-function that is > > called as part of decoding in multi-patterns; it is intended for > > use-cases (such as vendor-defined instructions in RISC-V) where the > > same bitpattern may decode into different functions depending on the > > overall configuration of the emulation target. > > But for a particular CPU, its "vendor ISAs" won't change at runtime. > > Since we know this at build time, I don't understand why you need > predicate support at all. > > > > > At this time, we only support predicates for multi-patterns. > > > > Signed-off-by: Philipp Tomsich <philipp.tomsich@vrull.eu> > > > > --- > > > > docs/devel/decodetree.rst | 7 ++++++- > > scripts/decodetree.py | 24 +++++++++++++++++++++--- > > 2 files changed, 27 insertions(+), 4 deletions(-) > > > > diff --git a/docs/devel/decodetree.rst b/docs/devel/decodetree.rst > > index 49ea50c2a7..241aaec8bb 100644 > > --- a/docs/devel/decodetree.rst > > +++ b/docs/devel/decodetree.rst > > @@ -144,9 +144,10 @@ Patterns > > Syntax:: > > > > pat_def := identifier ( pat_elt )+ > > - pat_elt := fixedbit_elt | field_elt | field_ref | args_ref | fmt_ref | const_elt > > + pat_elt := fixedbit_elt | field_elt | field_ref | args_ref | fmt_ref | const_elt | predicate > > fmt_ref := '@' identifier > > const_elt := identifier '=' number > > + predicate := '|' identifier > > > > The *fixedbit_elt* and *field_elt* specifiers are unchanged from formats. > > A pattern that does not specify a named format will have one inferred > > @@ -156,6 +157,10 @@ A *const_elt* allows a argument to be set to a constant value. This may > > come in handy when fields overlap between patterns and one has to > > include the values in the *fixedbit_elt* instead. > > > > +A *predicate* allows to specify a predicate function (returing true or > > +false) to determine the applicability of the pattern. Currently, this > > +will change the decode-behaviour for overlapping multi-patterns only. > > + > > The decoder will call a translator function for each pattern matched. > > > > Pattern examples:: > > diff --git a/scripts/decodetree.py b/scripts/decodetree.py > > index a03dc6b5e3..7da2282411 100644 > > --- a/scripts/decodetree.py > > +++ b/scripts/decodetree.py > > @@ -52,6 +52,7 @@ > > re_fld_ident = '%[a-zA-Z0-9_]*' > > re_fmt_ident = '@[a-zA-Z0-9_]*' > > re_pat_ident = '[a-zA-Z0-9_]*' > > +re_predicate_ident = '\|[a-zA-Z_][a-zA-Z0-9_]*' > > > > def error_with_file(file, lineno, *args): > > """Print an error message from file:line and args and exit.""" > > @@ -119,6 +120,14 @@ def whexC(val): > > suffix = 'u' > > return whex(val) + suffix > > > > +def predicate(val): > > + """Return a string for calling a predicate function > > + (if specified, accepting 'None' as an indication > > + that no predicate is to be emitted) with the ctx > > + as a parameter.""" > > + if (val == None): > > + return '' > > + return ' && ' + val + '(ctx)' > > > > def str_match_bits(bits, mask): > > """Return a string pretty-printing BITS/MASK""" > > @@ -340,7 +349,7 @@ def output_def(self): > > > > class General: > > """Common code between instruction formats and instruction patterns""" > > - def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w): > > + def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w, p = None): > > self.name = name > > self.file = input_file > > self.lineno = lineno > > @@ -351,6 +360,7 @@ def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w): > > self.fieldmask = fldm > > self.fields = flds > > self.width = w > > + self.predicate = p > > > > def __str__(self): > > return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask) > > @@ -499,7 +509,7 @@ def output_code(self, i, extracted, outerbits, outermask): > > if outermask != p.fixedmask: > > innermask = p.fixedmask & ~outermask > > innerbits = p.fixedbits & ~outermask > > - output(ind, f'if ((insn & {whexC(innermask)}) == {whexC(innerbits)}) {{\n') > > + output(ind, f'if ((insn & {whexC(innermask)}) == {whexC(innerbits)}{predicate(p.predicate)}) {{\n') > > output(ind, f' /* {str_match_bits(p.fixedbits, p.fixedmask)} */\n') > > p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask) > > output(ind, '}\n') > > @@ -826,6 +836,7 @@ def parse_generic(lineno, parent_pat, name, toks): > > global re_fld_ident > > global re_fmt_ident > > global re_C_ident > > + global re_predicate_ident > > global insnwidth > > global insnmask > > global variablewidth > > @@ -839,6 +850,7 @@ def parse_generic(lineno, parent_pat, name, toks): > > flds = {} > > arg = None > > fmt = None > > + predicate = None > > for t in toks: > > # '&Foo' gives a format an explicit argument set. > > if re.fullmatch(re_arg_ident, t): > > @@ -881,6 +893,12 @@ def parse_generic(lineno, parent_pat, name, toks): > > flds = add_field(lineno, flds, fname, ConstField(value)) > > continue > > > > + # '|predicate' sets a predicate function to be called. > > + if re.fullmatch(re_predicate_ident, t): > > + tt = t[1:] > > + predicate = tt; > > + continue > > + > > # Pattern of 0s, 1s, dots and dashes indicate required zeros, > > # required ones, or dont-cares. > > if re.fullmatch('[01.-]+', t): > > @@ -979,7 +997,7 @@ def parse_generic(lineno, parent_pat, name, toks): > > if f not in flds.keys() and f not in fmt.fields.keys(): > > error(lineno, f'field {f} not initialized') > > pat = Pattern(name, lineno, fmt, fixedbits, fixedmask, > > - undefmask, fieldmask, flds, width) > > + undefmask, fieldmask, flds, width, predicate) > > parent_pat.pats.append(pat) > > allpatterns.append(pat) > >
Philippe, Assuming that we move into this direction for the vendor-extensions, I would like to also see some of the other conditionally available extensions in RISC-V to move to predicated decode (which would move this mechanism past overlapping multi-patterns). E.g., we have the following clumsiness for the Zb[abcs] extensions: #define REQUIRE_ZBB(ctx) do { \ > if (!RISCV_CPU(ctx->cs)->cfg.ext_zbb) { \ > return false; \ > } \ > } while (0) > static bool trans_clz(DisasContext *ctx, arg_clz *a) > { > REQUIRE_ZBB(ctx); > return gen_unary_per_ol(ctx, a, EXT_NONE, gen_clz, gen_clzw); > } Besides being easier to express in the table (as "|has_zbb_p"), this will have no performance impact (as the predicate is otherwise evaluated every time the trans_* function is invoked). I intentionally did not include this at this stage, as the relative benefit here is small, and changing decodetree.py for it is unjustified. If (however) we have the mechanism in decodetree.py to support the custom/vendor-defined extensions, this is a natural and obvious next step, though. Philipp. On Mon, 10 Jan 2022 at 10:52, Philipp Tomsich <philipp.tomsich@vrull.eu> wrote: > For RISC-V the opcode decode will change between different vendor > implementations of RISC-V (emulated by the same qemu binary). > Any two vendors may reuse the same opcode space, e.g., we may end up with: > > # *** RV64 Custom-3 Extension *** > { > vt_maskc 0000000 ..... ..... 110 ..... 1111011 @r > |has_xventanacondops_p > vt_maskcn 0000000 ..... ..... 111 ..... 1111011 @r > |has_xventanacondops_p > someone_something ............ ..... 000 ..... 1100111 @i > |has_xsomeonesomething_p > } > > With extensions being enabled either from the commandline > -cpu any,xventanacondops=true > or possibly even having a AMP in one emulation setup (e.g. application > cores having one extension and power-mangement cores having a > different one — or even a conflicting one). > > Cheers, > Philipp. > > > On Mon, 10 Jan 2022 at 10:43, Philippe Mathieu-Daudé <f4bug@amsat.org> > wrote: > > > > Hi Philipp, > > > > On 1/9/22 21:56, Philipp Tomsich wrote: > > > This adds the possibility to specify a predicate-function that is > > > called as part of decoding in multi-patterns; it is intended for > > > use-cases (such as vendor-defined instructions in RISC-V) where the > > > same bitpattern may decode into different functions depending on the > > > overall configuration of the emulation target. > > > > But for a particular CPU, its "vendor ISAs" won't change at runtime. > > > > Since we know this at build time, I don't understand why you need > > predicate support at all. > > > > > > > > At this time, we only support predicates for multi-patterns. > > > > > > Signed-off-by: Philipp Tomsich <philipp.tomsich@vrull.eu> > > > > > > --- > > > > > > docs/devel/decodetree.rst | 7 ++++++- > > > scripts/decodetree.py | 24 +++++++++++++++++++++--- > > > 2 files changed, 27 insertions(+), 4 deletions(-) > > > > > > diff --git a/docs/devel/decodetree.rst b/docs/devel/decodetree.rst > > > index 49ea50c2a7..241aaec8bb 100644 > > > --- a/docs/devel/decodetree.rst > > > +++ b/docs/devel/decodetree.rst > > > @@ -144,9 +144,10 @@ Patterns > > > Syntax:: > > > > > > pat_def := identifier ( pat_elt )+ > > > - pat_elt := fixedbit_elt | field_elt | field_ref | args_ref | > fmt_ref | const_elt > > > + pat_elt := fixedbit_elt | field_elt | field_ref | args_ref | > fmt_ref | const_elt | predicate > > > fmt_ref := '@' identifier > > > const_elt := identifier '=' number > > > + predicate := '|' identifier > > > > > > The *fixedbit_elt* and *field_elt* specifiers are unchanged from > formats. > > > A pattern that does not specify a named format will have one inferred > > > @@ -156,6 +157,10 @@ A *const_elt* allows a argument to be set to a > constant value. This may > > > come in handy when fields overlap between patterns and one has to > > > include the values in the *fixedbit_elt* instead. > > > > > > +A *predicate* allows to specify a predicate function (returing true or > > > +false) to determine the applicability of the pattern. Currently, this > > > +will change the decode-behaviour for overlapping multi-patterns only. > > > + > > > The decoder will call a translator function for each pattern matched. > > > > > > Pattern examples:: > > > diff --git a/scripts/decodetree.py b/scripts/decodetree.py > > > index a03dc6b5e3..7da2282411 100644 > > > --- a/scripts/decodetree.py > > > +++ b/scripts/decodetree.py > > > @@ -52,6 +52,7 @@ > > > re_fld_ident = '%[a-zA-Z0-9_]*' > > > re_fmt_ident = '@[a-zA-Z0-9_]*' > > > re_pat_ident = '[a-zA-Z0-9_]*' > > > +re_predicate_ident = '\|[a-zA-Z_][a-zA-Z0-9_]*' > > > > > > def error_with_file(file, lineno, *args): > > > """Print an error message from file:line and args and exit.""" > > > @@ -119,6 +120,14 @@ def whexC(val): > > > suffix = 'u' > > > return whex(val) + suffix > > > > > > +def predicate(val): > > > + """Return a string for calling a predicate function > > > + (if specified, accepting 'None' as an indication > > > + that no predicate is to be emitted) with the ctx > > > + as a parameter.""" > > > + if (val == None): > > > + return '' > > > + return ' && ' + val + '(ctx)' > > > > > > def str_match_bits(bits, mask): > > > """Return a string pretty-printing BITS/MASK""" > > > @@ -340,7 +349,7 @@ def output_def(self): > > > > > > class General: > > > """Common code between instruction formats and instruction > patterns""" > > > - def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, > flds, w): > > > + def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, > flds, w, p = None): > > > self.name = name > > > self.file = input_file > > > self.lineno = lineno > > > @@ -351,6 +360,7 @@ def __init__(self, name, lineno, base, fixb, fixm, > udfm, fldm, flds, w): > > > self.fieldmask = fldm > > > self.fields = flds > > > self.width = w > > > + self.predicate = p > > > > > > def __str__(self): > > > return self.name + ' ' + str_match_bits(self.fixedbits, > self.fixedmask) > > > @@ -499,7 +509,7 @@ def output_code(self, i, extracted, outerbits, > outermask): > > > if outermask != p.fixedmask: > > > innermask = p.fixedmask & ~outermask > > > innerbits = p.fixedbits & ~outermask > > > - output(ind, f'if ((insn & {whexC(innermask)}) == > {whexC(innerbits)}) {{\n') > > > + output(ind, f'if ((insn & {whexC(innermask)}) == > {whexC(innerbits)}{predicate(p.predicate)}) {{\n') > > > output(ind, f' /* {str_match_bits(p.fixedbits, > p.fixedmask)} */\n') > > > p.output_code(i + 4, extracted, p.fixedbits, > p.fixedmask) > > > output(ind, '}\n') > > > @@ -826,6 +836,7 @@ def parse_generic(lineno, parent_pat, name, toks): > > > global re_fld_ident > > > global re_fmt_ident > > > global re_C_ident > > > + global re_predicate_ident > > > global insnwidth > > > global insnmask > > > global variablewidth > > > @@ -839,6 +850,7 @@ def parse_generic(lineno, parent_pat, name, toks): > > > flds = {} > > > arg = None > > > fmt = None > > > + predicate = None > > > for t in toks: > > > # '&Foo' gives a format an explicit argument set. > > > if re.fullmatch(re_arg_ident, t): > > > @@ -881,6 +893,12 @@ def parse_generic(lineno, parent_pat, name, toks): > > > flds = add_field(lineno, flds, fname, ConstField(value)) > > > continue > > > > > > + # '|predicate' sets a predicate function to be called. > > > + if re.fullmatch(re_predicate_ident, t): > > > + tt = t[1:] > > > + predicate = tt; > > > + continue > > > + > > > # Pattern of 0s, 1s, dots and dashes indicate required zeros, > > > # required ones, or dont-cares. > > > if re.fullmatch('[01.-]+', t): > > > @@ -979,7 +997,7 @@ def parse_generic(lineno, parent_pat, name, toks): > > > if f not in flds.keys() and f not in fmt.fields.keys(): > > > error(lineno, f'field {f} not initialized') > > > pat = Pattern(name, lineno, fmt, fixedbits, fixedmask, > > > - undefmask, fieldmask, flds, width) > > > + undefmask, fieldmask, flds, width, predicate) > > > parent_pat.pats.append(pat) > > > allpatterns.append(pat) > > > >
On 1/10/22 10:52, Philipp Tomsich wrote: > For RISC-V the opcode decode will change between different vendor > implementations of RISC-V (emulated by the same qemu binary). > Any two vendors may reuse the same opcode space, e.g., we may end up with: > > # *** RV64 Custom-3 Extension *** > { > vt_maskc 0000000 ..... ..... 110 ..... 1111011 @r |has_xventanacondops_p > vt_maskcn 0000000 ..... ..... 111 ..... 1111011 @r |has_xventanacondops_p > someone_something ............ ..... 000 ..... 1100111 @i > |has_xsomeonesomething_p > } > > With extensions being enabled either from the commandline > -cpu any,xventanacondops=true > or possibly even having a AMP in one emulation setup (e.g. application > cores having one extension and power-mangement cores having a > different one — or even a conflicting one). I understand, I think this is what MIPS does, see commit 9d005392390: ("target/mips: Introduce decodetree structure for NEC Vr54xx extension") > > Cheers, > Philipp. > > > On Mon, 10 Jan 2022 at 10:43, Philippe Mathieu-Daudé <f4bug@amsat.org> wrote: >> >> Hi Philipp, >> >> On 1/9/22 21:56, Philipp Tomsich wrote: >>> This adds the possibility to specify a predicate-function that is >>> called as part of decoding in multi-patterns; it is intended for >>> use-cases (such as vendor-defined instructions in RISC-V) where the >>> same bitpattern may decode into different functions depending on the >>> overall configuration of the emulation target. >> >> But for a particular CPU, its "vendor ISAs" won't change at runtime. >> >> Since we know this at build time, I don't understand why you need >> predicate support at all. >> >>> >>> At this time, we only support predicates for multi-patterns. >>> >>> Signed-off-by: Philipp Tomsich <philipp.tomsich@vrull.eu> >>> >>> --- >>> >>> docs/devel/decodetree.rst | 7 ++++++- >>> scripts/decodetree.py | 24 +++++++++++++++++++++--- >>> 2 files changed, 27 insertions(+), 4 deletions(-) >>> >>> diff --git a/docs/devel/decodetree.rst b/docs/devel/decodetree.rst >>> index 49ea50c2a7..241aaec8bb 100644 >>> --- a/docs/devel/decodetree.rst >>> +++ b/docs/devel/decodetree.rst >>> @@ -144,9 +144,10 @@ Patterns >>> Syntax:: >>> >>> pat_def := identifier ( pat_elt )+ >>> - pat_elt := fixedbit_elt | field_elt | field_ref | args_ref | fmt_ref | const_elt >>> + pat_elt := fixedbit_elt | field_elt | field_ref | args_ref | fmt_ref | const_elt | predicate >>> fmt_ref := '@' identifier >>> const_elt := identifier '=' number >>> + predicate := '|' identifier >>> >>> The *fixedbit_elt* and *field_elt* specifiers are unchanged from formats. >>> A pattern that does not specify a named format will have one inferred >>> @@ -156,6 +157,10 @@ A *const_elt* allows a argument to be set to a constant value. This may >>> come in handy when fields overlap between patterns and one has to >>> include the values in the *fixedbit_elt* instead. >>> >>> +A *predicate* allows to specify a predicate function (returing true or >>> +false) to determine the applicability of the pattern. Currently, this >>> +will change the decode-behaviour for overlapping multi-patterns only. >>> + >>> The decoder will call a translator function for each pattern matched. >>> >>> Pattern examples:: >>> diff --git a/scripts/decodetree.py b/scripts/decodetree.py >>> index a03dc6b5e3..7da2282411 100644 >>> --- a/scripts/decodetree.py >>> +++ b/scripts/decodetree.py >>> @@ -52,6 +52,7 @@ >>> re_fld_ident = '%[a-zA-Z0-9_]*' >>> re_fmt_ident = '@[a-zA-Z0-9_]*' >>> re_pat_ident = '[a-zA-Z0-9_]*' >>> +re_predicate_ident = '\|[a-zA-Z_][a-zA-Z0-9_]*' >>> >>> def error_with_file(file, lineno, *args): >>> """Print an error message from file:line and args and exit.""" >>> @@ -119,6 +120,14 @@ def whexC(val): >>> suffix = 'u' >>> return whex(val) + suffix >>> >>> +def predicate(val): >>> + """Return a string for calling a predicate function >>> + (if specified, accepting 'None' as an indication >>> + that no predicate is to be emitted) with the ctx >>> + as a parameter.""" >>> + if (val == None): >>> + return '' >>> + return ' && ' + val + '(ctx)' >>> >>> def str_match_bits(bits, mask): >>> """Return a string pretty-printing BITS/MASK""" >>> @@ -340,7 +349,7 @@ def output_def(self): >>> >>> class General: >>> """Common code between instruction formats and instruction patterns""" >>> - def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w): >>> + def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w, p = None): >>> self.name = name >>> self.file = input_file >>> self.lineno = lineno >>> @@ -351,6 +360,7 @@ def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w): >>> self.fieldmask = fldm >>> self.fields = flds >>> self.width = w >>> + self.predicate = p >>> >>> def __str__(self): >>> return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask) >>> @@ -499,7 +509,7 @@ def output_code(self, i, extracted, outerbits, outermask): >>> if outermask != p.fixedmask: >>> innermask = p.fixedmask & ~outermask >>> innerbits = p.fixedbits & ~outermask >>> - output(ind, f'if ((insn & {whexC(innermask)}) == {whexC(innerbits)}) {{\n') >>> + output(ind, f'if ((insn & {whexC(innermask)}) == {whexC(innerbits)}{predicate(p.predicate)}) {{\n') >>> output(ind, f' /* {str_match_bits(p.fixedbits, p.fixedmask)} */\n') >>> p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask) >>> output(ind, '}\n') >>> @@ -826,6 +836,7 @@ def parse_generic(lineno, parent_pat, name, toks): >>> global re_fld_ident >>> global re_fmt_ident >>> global re_C_ident >>> + global re_predicate_ident >>> global insnwidth >>> global insnmask >>> global variablewidth >>> @@ -839,6 +850,7 @@ def parse_generic(lineno, parent_pat, name, toks): >>> flds = {} >>> arg = None >>> fmt = None >>> + predicate = None >>> for t in toks: >>> # '&Foo' gives a format an explicit argument set. >>> if re.fullmatch(re_arg_ident, t): >>> @@ -881,6 +893,12 @@ def parse_generic(lineno, parent_pat, name, toks): >>> flds = add_field(lineno, flds, fname, ConstField(value)) >>> continue >>> >>> + # '|predicate' sets a predicate function to be called. >>> + if re.fullmatch(re_predicate_ident, t): >>> + tt = t[1:] >>> + predicate = tt; >>> + continue >>> + >>> # Pattern of 0s, 1s, dots and dashes indicate required zeros, >>> # required ones, or dont-cares. >>> if re.fullmatch('[01.-]+', t): >>> @@ -979,7 +997,7 @@ def parse_generic(lineno, parent_pat, name, toks): >>> if f not in flds.keys() and f not in fmt.fields.keys(): >>> error(lineno, f'field {f} not initialized') >>> pat = Pattern(name, lineno, fmt, fixedbits, fixedmask, >>> - undefmask, fieldmask, flds, width) >>> + undefmask, fieldmask, flds, width, predicate) >>> parent_pat.pats.append(pat) >>> allpatterns.append(pat) >>>
On Mon, 10 Jan 2022 at 11:03, Philippe Mathieu-Daudé <f4bug@amsat.org> wrote: > On 1/10/22 10:52, Philipp Tomsich wrote: > > For RISC-V the opcode decode will change between different vendor > > implementations of RISC-V (emulated by the same qemu binary). > > Any two vendors may reuse the same opcode space, e.g., we may end up > with: > > > > # *** RV64 Custom-3 Extension *** > > { > > vt_maskc 0000000 ..... ..... 110 ..... 1111011 @r > |has_xventanacondops_p > > vt_maskcn 0000000 ..... ..... 111 ..... 1111011 @r > |has_xventanacondops_p > > someone_something ............ ..... 000 ..... 1100111 @i > > |has_xsomeonesomething_p > > } > > > > With extensions being enabled either from the commandline > > -cpu any,xventanacondops=true > > or possibly even having a AMP in one emulation setup (e.g. application > > cores having one extension and power-mangement cores having a > > different one — or even a conflicting one). > > I understand, I think this is what MIPS does, see commit 9d005392390: > ("target/mips: Introduce decodetree structure for NEC Vr54xx extension") > The MIPS implementation is functionally equivalent, and I could see us doing something similar for RISC-V (although I would strongly prefer to make everything explicit via the .decode-file instead of relying on people being aware of the logic in decode_op). With the growing number of optional extensions (as of today, at least the Zb[abcs] and vector comes to mind), we would end up with a large number of decode-files that will then need to be sequentially called from decode_op(). The predicates can then move up into decode_op, predicting the auto-generated decoders from being invoked. As of today, we have predicates for at least the following: - Zb[abcs] - Vectors As long as we are in greenfield territory (i.e. not dealing with HINT-instructions that overlap existing opcode space), this will be fine and provide proper isolation between the .decode-tables. However, as soon as we need to implement something along the lines (I know this is a bad example, as prefetching will be a no-op on qemu) of: { { # *** RV32 Zicbop Sandard Extension (hints in the ori-space) *** prefetch_i ....... 00000 ..... 110 00000 0010011 @cbo_pref prefetch_r ....... 00001 ..... 110 00000 0010011 @cbo_pref prefetch_w ....... 00011 ..... 110 00000 0010011 @cbo_pref } ori ............ ..... 110 ..... 0010011 @i } we'd need to make sure that the generated decoders are called in the appropriate order (i.e. the decoder for the specialized instructions will need to be called first), which would not be apparent from looking at the individual .decode files. Let me know what direction we want to take (of course, I have a bias towards the one in the patch). Philipp.
On 1/10/22 12:11, Philipp Tomsich wrote: > On Mon, 10 Jan 2022 at 11:03, Philippe Mathieu-Daudé <f4bug@amsat.org > <mailto:f4bug@amsat.org>> wrote: > > On 1/10/22 10:52, Philipp Tomsich wrote: > > For RISC-V the opcode decode will change between different vendor > > implementations of RISC-V (emulated by the same qemu binary). > > Any two vendors may reuse the same opcode space, e.g., we may end > up with: > > > > # *** RV64 Custom-3 Extension *** > > { > > vt_maskc 0000000 ..... ..... 110 ..... 1111011 @r > |has_xventanacondops_p > > vt_maskcn 0000000 ..... ..... 111 ..... 1111011 @r > |has_xventanacondops_p > > someone_something ............ ..... 000 ..... 1100111 @i > > |has_xsomeonesomething_p > > } > > > > With extensions being enabled either from the commandline > > -cpu any,xventanacondops=true > > or possibly even having a AMP in one emulation setup (e.g. application > > cores having one extension and power-mangement cores having a > > different one — or even a conflicting one). > > I understand, I think this is what MIPS does, see commit 9d005392390: > ("target/mips: Introduce decodetree structure for NEC Vr54xx extension") > > > The MIPS implementation is functionally equivalent, and I could see us > doing something similar for RISC-V (although I would strongly prefer to > make everything explicit via the .decode-file instead of relying on > people being aware of the logic in decode_op). > > With the growing number of optional extensions (as of today, at least > the Zb[abcs] and vector comes to mind), we would end up with a large > number of decode-files that will then need to be sequentially called > from decode_op(). The predicates can then move up into decode_op, > predicting the auto-generated decoders from being invoked. > > As of today, we have predicates for at least the following: > > * Zb[abcs] > * Vectors > > As long as we are in greenfield territory (i.e. not dealing with > HINT-instructions that overlap existing opcode space), this will be fine > and provide proper isolation between the .decode-tables. > However, as soon as we need to implement something along the lines (I > know this is a bad example, as prefetching will be a no-op on qemu) of: > > { > { > # *** RV32 Zicbop Sandard Extension (hints in the ori-space) *** > prefetch_i ....... 00000 ..... 110 00000 0010011 @cbo_pref > prefetch_r ....... 00001 ..... 110 00000 0010011 @cbo_pref > prefetch_w ....... 00011 ..... 110 00000 0010011 @cbo_pref > } > ori ............ ..... 110 ..... 0010011 @i > } > > we'd need to make sure that the generated decoders are called in the > appropriate order (i.e. the decoder for the specialized instructions > will need to be called first), which would not be apparent from looking > at the individual .decode files. > > Let me know what direction we want to take (of course, I have a bias > towards the one in the patch). I can't say for RISCV performance, I am myself biased toward maintenance where having one compilation unit per (vendor) extension. ARM and MIPS seems to go in this direction, while PPC and RISCV in the other one.
Alistair, Do you (and the other RISC-V custodians of target/riscv) have any opinion on this? We can go either way — and it boils down to a style and process question. Thanks, Philipp. On Mon, 10 Jan 2022 at 12:30, Philippe Mathieu-Daudé <f4bug@amsat.org> wrote: > On 1/10/22 12:11, Philipp Tomsich wrote: > > On Mon, 10 Jan 2022 at 11:03, Philippe Mathieu-Daudé <f4bug@amsat.org > > <mailto:f4bug@amsat.org>> wrote: > > > > On 1/10/22 10:52, Philipp Tomsich wrote: > > > For RISC-V the opcode decode will change between different vendor > > > implementations of RISC-V (emulated by the same qemu binary). > > > Any two vendors may reuse the same opcode space, e.g., we may end > > up with: > > > > > > # *** RV64 Custom-3 Extension *** > > > { > > > vt_maskc 0000000 ..... ..... 110 ..... 1111011 @r > > |has_xventanacondops_p > > > vt_maskcn 0000000 ..... ..... 111 ..... 1111011 @r > > |has_xventanacondops_p > > > someone_something ............ ..... 000 ..... 1100111 @i > > > |has_xsomeonesomething_p > > > } > > > > > > With extensions being enabled either from the commandline > > > -cpu any,xventanacondops=true > > > or possibly even having a AMP in one emulation setup (e.g. > application > > > cores having one extension and power-mangement cores having a > > > different one — or even a conflicting one). > > > > I understand, I think this is what MIPS does, see commit 9d005392390: > > ("target/mips: Introduce decodetree structure for NEC Vr54xx > extension") > > > > > > The MIPS implementation is functionally equivalent, and I could see us > > doing something similar for RISC-V (although I would strongly prefer to > > make everything explicit via the .decode-file instead of relying on > > people being aware of the logic in decode_op). > > > > With the growing number of optional extensions (as of today, at least > > the Zb[abcs] and vector comes to mind), we would end up with a large > > number of decode-files that will then need to be sequentially called > > from decode_op(). The predicates can then move up into decode_op, > > predicting the auto-generated decoders from being invoked. > > > > As of today, we have predicates for at least the following: > > > > * Zb[abcs] > > * Vectors > > > > As long as we are in greenfield territory (i.e. not dealing with > > HINT-instructions that overlap existing opcode space), this will be fine > > and provide proper isolation between the .decode-tables. > > However, as soon as we need to implement something along the lines (I > > know this is a bad example, as prefetching will be a no-op on qemu) of: > > > > { > > { > > # *** RV32 Zicbop Sandard Extension (hints in the ori-space) *** > > prefetch_i ....... 00000 ..... 110 00000 0010011 @cbo_pref > > prefetch_r ....... 00001 ..... 110 00000 0010011 @cbo_pref > > prefetch_w ....... 00011 ..... 110 00000 0010011 @cbo_pref > > } > > ori ............ ..... 110 ..... 0010011 @i > > } > > > > we'd need to make sure that the generated decoders are called in the > > appropriate order (i.e. the decoder for the specialized instructions > > will need to be called first), which would not be apparent from looking > > at the individual .decode files. > > > > Let me know what direction we want to take (of course, I have a bias > > towards the one in the patch). > > I can't say for RISCV performance, I am myself biased toward maintenance > where having one compilation unit per (vendor) extension. > ARM and MIPS seems to go in this direction, while PPC and RISCV in the > other one. >
On Thu, Jan 13, 2022 at 1:42 AM Philipp Tomsich <philipp.tomsich@vrull.eu> wrote: > > Alistair, > > Do you (and the other RISC-V custodians of target/riscv) have any opinion on this? > We can go either way — and it boils down to a style and process question. Sorry, it's a busy week! I had a quick look over this series and left some comments below. > > Thanks, > Philipp. > > On Mon, 10 Jan 2022 at 12:30, Philippe Mathieu-Daudé <f4bug@amsat.org> wrote: >> >> On 1/10/22 12:11, Philipp Tomsich wrote: >> > On Mon, 10 Jan 2022 at 11:03, Philippe Mathieu-Daudé <f4bug@amsat.org >> > <mailto:f4bug@amsat.org>> wrote: >> > >> > On 1/10/22 10:52, Philipp Tomsich wrote: >> > > For RISC-V the opcode decode will change between different vendor >> > > implementations of RISC-V (emulated by the same qemu binary). >> > > Any two vendors may reuse the same opcode space, e.g., we may end >> > up with: >> > > >> > > # *** RV64 Custom-3 Extension *** >> > > { >> > > vt_maskc 0000000 ..... ..... 110 ..... 1111011 @r >> > |has_xventanacondops_p >> > > vt_maskcn 0000000 ..... ..... 111 ..... 1111011 @r >> > |has_xventanacondops_p >> > > someone_something ............ ..... 000 ..... 1100111 @i >> > > |has_xsomeonesomething_p >> > > } I don't love this. If even a few vendors use this we could have a huge number of instructions here >> > > >> > > With extensions being enabled either from the commandline >> > > -cpu any,xventanacondops=true >> > > or possibly even having a AMP in one emulation setup (e.g. application >> > > cores having one extension and power-mangement cores having a >> > > different one — or even a conflicting one). Agreed, an AMP configuration is entirely possible. >> > >> > I understand, I think this is what MIPS does, see commit 9d005392390: >> > ("target/mips: Introduce decodetree structure for NEC Vr54xx extension") >> > >> > >> > The MIPS implementation is functionally equivalent, and I could see us >> > doing something similar for RISC-V (although I would strongly prefer to >> > make everything explicit via the .decode-file instead of relying on >> > people being aware of the logic in decode_op). >> > >> > With the growing number of optional extensions (as of today, at least >> > the Zb[abcs] and vector comes to mind), we would end up with a large >> > number of decode-files that will then need to be sequentially called >> > from decode_op(). The predicates can then move up into decode_op, >> > predicting the auto-generated decoders from being invoked. >> > >> > As of today, we have predicates for at least the following: >> > >> > * Zb[abcs] >> > * Vectors I see your point, having a long list of decode_*() functions to call is a hassle. On the other hand having thousands of lines in insn32.decode is also a pain. In saying that, having official RISC-V extensions in insn32.decode and vendor instructions in insn<vendor>.decode seems like a reasonable compromise. Maybe even large extensions (vector maybe?) could have their own insn<extension>.decode file, on a case by case basis. >> > >> > As long as we are in greenfield territory (i.e. not dealing with >> > HINT-instructions that overlap existing opcode space), this will be fine >> > and provide proper isolation between the .decode-tables. >> > However, as soon as we need to implement something along the lines (I >> > know this is a bad example, as prefetching will be a no-op on qemu) of: >> > >> > { >> > { >> > # *** RV32 Zicbop Sandard Extension (hints in the ori-space) *** >> > prefetch_i ....... 00000 ..... 110 00000 0010011 @cbo_pref >> > prefetch_r ....... 00001 ..... 110 00000 0010011 @cbo_pref >> > prefetch_w ....... 00011 ..... 110 00000 0010011 @cbo_pref >> > } >> > ori ............ ..... 110 ..... 0010011 @i >> > } >> > >> > we'd need to make sure that the generated decoders are called in the >> > appropriate order (i.e. the decoder for the specialized instructions >> > will need to be called first), which would not be apparent from looking >> > at the individual .decode files. >> > >> > Let me know what direction we want to take (of course, I have a bias >> > towards the one in the patch). >> >> I can't say for RISCV performance, I am myself biased toward maintenance >> where having one compilation unit per (vendor) extension. >> ARM and MIPS seems to go in this direction, while PPC and RISCV in the >> other one. I think we could do both right? We could add the predicate support, but also isolate vendors to their own decode file So for example, for vendor abc +++ b/target/riscv/insnabc.decode +# *** Custom abc Extension *** +{ + vt_maskc 0000000 ..... ..... 110 ..... 1111011 @r |has_abc_c + vt_maskcn 0000000 ..... ..... 111 ..... 1111011 @r |has_abc_d +} Then there is a decode_abc(), but we support extension abc_c and abc_d That way we can keep all the vendor code seperate, while still allowing flexibility. Will that work? We can also then use predicate support for the standard RISC-V extensions as described by Philipp Alistair
On Thu, 13 Jan 2022 at 06:07, Alistair Francis <alistair23@gmail.com> wrote: > > On Thu, Jan 13, 2022 at 1:42 AM Philipp Tomsich > <philipp.tomsich@vrull.eu> wrote: > > > > Alistair, > > > > Do you (and the other RISC-V custodians of target/riscv) have any opinion on this? > > We can go either way — and it boils down to a style and process question. > > Sorry, it's a busy week! > > I had a quick look over this series and left some comments below. Thank you for taking the time despite the busy week — I can absolutely relate, as it seems that January is picking up right where December left off ;-) > > > > > Thanks, > > Philipp. > > > > On Mon, 10 Jan 2022 at 12:30, Philippe Mathieu-Daudé <f4bug@amsat.org> wrote: > >> > >> On 1/10/22 12:11, Philipp Tomsich wrote: > >> > On Mon, 10 Jan 2022 at 11:03, Philippe Mathieu-Daudé <f4bug@amsat.org > >> > <mailto:f4bug@amsat.org>> wrote: > >> > > >> > On 1/10/22 10:52, Philipp Tomsich wrote: > >> > > For RISC-V the opcode decode will change between different vendor > >> > > implementations of RISC-V (emulated by the same qemu binary). > >> > > Any two vendors may reuse the same opcode space, e.g., we may end > >> > up with: > >> > > > >> > > # *** RV64 Custom-3 Extension *** > >> > > { > >> > > vt_maskc 0000000 ..... ..... 110 ..... 1111011 @r > >> > |has_xventanacondops_p > >> > > vt_maskcn 0000000 ..... ..... 111 ..... 1111011 @r > >> > |has_xventanacondops_p > >> > > someone_something ............ ..... 000 ..... 1100111 @i > >> > > |has_xsomeonesomething_p > >> > > } > > I don't love this. If even a few vendors use this we could have a huge > number of instructions here > > >> > > > >> > > With extensions being enabled either from the commandline > >> > > -cpu any,xventanacondops=true > >> > > or possibly even having a AMP in one emulation setup (e.g. application > >> > > cores having one extension and power-mangement cores having a > >> > > different one — or even a conflicting one). > > Agreed, an AMP configuration is entirely possible. > > >> > > >> > I understand, I think this is what MIPS does, see commit 9d005392390: > >> > ("target/mips: Introduce decodetree structure for NEC Vr54xx extension") > >> > > >> > > >> > The MIPS implementation is functionally equivalent, and I could see us > >> > doing something similar for RISC-V (although I would strongly prefer to > >> > make everything explicit via the .decode-file instead of relying on > >> > people being aware of the logic in decode_op). > >> > > >> > With the growing number of optional extensions (as of today, at least > >> > the Zb[abcs] and vector comes to mind), we would end up with a large > >> > number of decode-files that will then need to be sequentially called > >> > from decode_op(). The predicates can then move up into decode_op, > >> > predicting the auto-generated decoders from being invoked. > >> > > >> > As of today, we have predicates for at least the following: > >> > > >> > * Zb[abcs] > >> > * Vectors > > I see your point, having a long list of decode_*() functions to call > is a hassle. On the other hand having thousands of lines in > insn32.decode is also a pain. > > In saying that, having official RISC-V extensions in insn32.decode and > vendor instructions in insn<vendor>.decode seems like a reasonable > compromise. Maybe even large extensions (vector maybe?) could have > their own insn<extension>.decode file, on a case by case basis. > > >> > > >> > As long as we are in greenfield territory (i.e. not dealing with > >> > HINT-instructions that overlap existing opcode space), this will be fine > >> > and provide proper isolation between the .decode-tables. > >> > However, as soon as we need to implement something along the lines (I > >> > know this is a bad example, as prefetching will be a no-op on qemu) of: > >> > > >> > { > >> > { > >> > # *** RV32 Zicbop Sandard Extension (hints in the ori-space) *** > >> > prefetch_i ....... 00000 ..... 110 00000 0010011 @cbo_pref > >> > prefetch_r ....... 00001 ..... 110 00000 0010011 @cbo_pref > >> > prefetch_w ....... 00011 ..... 110 00000 0010011 @cbo_pref > >> > } > >> > ori ............ ..... 110 ..... 0010011 @i > >> > } > >> > > >> > we'd need to make sure that the generated decoders are called in the > >> > appropriate order (i.e. the decoder for the specialized instructions > >> > will need to be called first), which would not be apparent from looking > >> > at the individual .decode files. > >> > > >> > Let me know what direction we want to take (of course, I have a bias > >> > towards the one in the patch). > >> > >> I can't say for RISCV performance, I am myself biased toward maintenance > >> where having one compilation unit per (vendor) extension. > >> ARM and MIPS seems to go in this direction, while PPC and RISCV in the > >> other one. > > I think we could do both right? > > We could add the predicate support, but also isolate vendors to their > own decode file > > So for example, for vendor abc > > +++ b/target/riscv/insnabc.decode > +# *** Custom abc Extension *** > +{ > + vt_maskc 0000000 ..... ..... 110 ..... 1111011 @r |has_abc_c > + vt_maskcn 0000000 ..... ..... 111 ..... 1111011 @r |has_abc_d > +} > > Then there is a decode_abc(), but we support extension abc_c and abc_d > > That way we can keep all the vendor code seperate, while still > allowing flexibility. Will that work? I'll split this up into multiple series then: 1. XVentanaCondOps will use a separate decoder (so no decodetree changes in v2). 2. A new series that adds predicate support and uses it for Zb[abcs] 3. A third series that factors vector out of the insn32.decode and puts it into its own decoder. This will give us the chance to discuss individual design changes at their own speed. Philipp. > > We can also then use predicate support for the standard RISC-V > extensions as described by Philipp > > Alistair
On Thu, Jan 13, 2022 at 6:28 PM Philipp Tomsich <philipp.tomsich@vrull.eu> wrote: > > On Thu, 13 Jan 2022 at 06:07, Alistair Francis <alistair23@gmail.com> wrote: > > > > On Thu, Jan 13, 2022 at 1:42 AM Philipp Tomsich > > <philipp.tomsich@vrull.eu> wrote: > > > > > > Alistair, > > > > > > Do you (and the other RISC-V custodians of target/riscv) have any opinion on this? > > > We can go either way — and it boils down to a style and process question. > > > > Sorry, it's a busy week! > > > > I had a quick look over this series and left some comments below. > > > Thank you for taking the time despite the busy week — I can absolutely > relate, as it seems that January is picking up right where December > left off ;-) > > > > > > > > > Thanks, > > > Philipp. > > > > > > On Mon, 10 Jan 2022 at 12:30, Philippe Mathieu-Daudé <f4bug@amsat.org> wrote: > > >> > > >> On 1/10/22 12:11, Philipp Tomsich wrote: > > >> > On Mon, 10 Jan 2022 at 11:03, Philippe Mathieu-Daudé <f4bug@amsat.org > > >> > <mailto:f4bug@amsat.org>> wrote: > > >> > > > >> > On 1/10/22 10:52, Philipp Tomsich wrote: > > >> > > For RISC-V the opcode decode will change between different vendor > > >> > > implementations of RISC-V (emulated by the same qemu binary). > > >> > > Any two vendors may reuse the same opcode space, e.g., we may end > > >> > up with: > > >> > > > > >> > > # *** RV64 Custom-3 Extension *** > > >> > > { > > >> > > vt_maskc 0000000 ..... ..... 110 ..... 1111011 @r > > >> > |has_xventanacondops_p > > >> > > vt_maskcn 0000000 ..... ..... 111 ..... 1111011 @r > > >> > |has_xventanacondops_p > > >> > > someone_something ............ ..... 000 ..... 1100111 @i > > >> > > |has_xsomeonesomething_p > > >> > > } > > > > I don't love this. If even a few vendors use this we could have a huge > > number of instructions here > > > > >> > > > > >> > > With extensions being enabled either from the commandline > > >> > > -cpu any,xventanacondops=true > > >> > > or possibly even having a AMP in one emulation setup (e.g. application > > >> > > cores having one extension and power-mangement cores having a > > >> > > different one — or even a conflicting one). > > > > Agreed, an AMP configuration is entirely possible. > > > > >> > > > >> > I understand, I think this is what MIPS does, see commit 9d005392390: > > >> > ("target/mips: Introduce decodetree structure for NEC Vr54xx extension") > > >> > > > >> > > > >> > The MIPS implementation is functionally equivalent, and I could see us > > >> > doing something similar for RISC-V (although I would strongly prefer to > > >> > make everything explicit via the .decode-file instead of relying on > > >> > people being aware of the logic in decode_op). > > >> > > > >> > With the growing number of optional extensions (as of today, at least > > >> > the Zb[abcs] and vector comes to mind), we would end up with a large > > >> > number of decode-files that will then need to be sequentially called > > >> > from decode_op(). The predicates can then move up into decode_op, > > >> > predicting the auto-generated decoders from being invoked. > > >> > > > >> > As of today, we have predicates for at least the following: > > >> > > > >> > * Zb[abcs] > > >> > * Vectors > > > > I see your point, having a long list of decode_*() functions to call > > is a hassle. On the other hand having thousands of lines in > > insn32.decode is also a pain. > > > > In saying that, having official RISC-V extensions in insn32.decode and > > vendor instructions in insn<vendor>.decode seems like a reasonable > > compromise. Maybe even large extensions (vector maybe?) could have > > their own insn<extension>.decode file, on a case by case basis. > > > > >> > > > >> > As long as we are in greenfield territory (i.e. not dealing with > > >> > HINT-instructions that overlap existing opcode space), this will be fine > > >> > and provide proper isolation between the .decode-tables. > > >> > However, as soon as we need to implement something along the lines (I > > >> > know this is a bad example, as prefetching will be a no-op on qemu) of: > > >> > > > >> > { > > >> > { > > >> > # *** RV32 Zicbop Sandard Extension (hints in the ori-space) *** > > >> > prefetch_i ....... 00000 ..... 110 00000 0010011 @cbo_pref > > >> > prefetch_r ....... 00001 ..... 110 00000 0010011 @cbo_pref > > >> > prefetch_w ....... 00011 ..... 110 00000 0010011 @cbo_pref > > >> > } > > >> > ori ............ ..... 110 ..... 0010011 @i > > >> > } > > >> > > > >> > we'd need to make sure that the generated decoders are called in the > > >> > appropriate order (i.e. the decoder for the specialized instructions > > >> > will need to be called first), which would not be apparent from looking > > >> > at the individual .decode files. > > >> > > > >> > Let me know what direction we want to take (of course, I have a bias > > >> > towards the one in the patch). > > >> > > >> I can't say for RISCV performance, I am myself biased toward maintenance > > >> where having one compilation unit per (vendor) extension. > > >> ARM and MIPS seems to go in this direction, while PPC and RISCV in the > > >> other one. > > > > I think we could do both right? > > > > We could add the predicate support, but also isolate vendors to their > > own decode file > > > > So for example, for vendor abc > > > > +++ b/target/riscv/insnabc.decode > > +# *** Custom abc Extension *** > > +{ > > + vt_maskc 0000000 ..... ..... 110 ..... 1111011 @r |has_abc_c > > + vt_maskcn 0000000 ..... ..... 111 ..... 1111011 @r |has_abc_d > > +} > > > > Then there is a decode_abc(), but we support extension abc_c and abc_d > > > > That way we can keep all the vendor code seperate, while still > > allowing flexibility. Will that work? > > I'll split this up into multiple series then: > 1. XVentanaCondOps will use a separate decoder (so no decodetree changes in v2). > 2. A new series that adds predicate support and uses it for Zb[abcs] > 3. A third series that factors vector out of the insn32.decode and > puts it into its own decoder. > > This will give us the chance to discuss individual design changes at > their own speed. Great! Thanks for that Alistair > > Philipp. > > > > > We can also then use predicate support for the standard RISC-V > > extensions as described by Philipp > > > > Alistair
diff --git a/docs/devel/decodetree.rst b/docs/devel/decodetree.rst index 49ea50c2a7..241aaec8bb 100644 --- a/docs/devel/decodetree.rst +++ b/docs/devel/decodetree.rst @@ -144,9 +144,10 @@ Patterns Syntax:: pat_def := identifier ( pat_elt )+ - pat_elt := fixedbit_elt | field_elt | field_ref | args_ref | fmt_ref | const_elt + pat_elt := fixedbit_elt | field_elt | field_ref | args_ref | fmt_ref | const_elt | predicate fmt_ref := '@' identifier const_elt := identifier '=' number + predicate := '|' identifier The *fixedbit_elt* and *field_elt* specifiers are unchanged from formats. A pattern that does not specify a named format will have one inferred @@ -156,6 +157,10 @@ A *const_elt* allows a argument to be set to a constant value. This may come in handy when fields overlap between patterns and one has to include the values in the *fixedbit_elt* instead. +A *predicate* allows to specify a predicate function (returing true or +false) to determine the applicability of the pattern. Currently, this +will change the decode-behaviour for overlapping multi-patterns only. + The decoder will call a translator function for each pattern matched. Pattern examples:: diff --git a/scripts/decodetree.py b/scripts/decodetree.py index a03dc6b5e3..7da2282411 100644 --- a/scripts/decodetree.py +++ b/scripts/decodetree.py @@ -52,6 +52,7 @@ re_fld_ident = '%[a-zA-Z0-9_]*' re_fmt_ident = '@[a-zA-Z0-9_]*' re_pat_ident = '[a-zA-Z0-9_]*' +re_predicate_ident = '\|[a-zA-Z_][a-zA-Z0-9_]*' def error_with_file(file, lineno, *args): """Print an error message from file:line and args and exit.""" @@ -119,6 +120,14 @@ def whexC(val): suffix = 'u' return whex(val) + suffix +def predicate(val): + """Return a string for calling a predicate function + (if specified, accepting 'None' as an indication + that no predicate is to be emitted) with the ctx + as a parameter.""" + if (val == None): + return '' + return ' && ' + val + '(ctx)' def str_match_bits(bits, mask): """Return a string pretty-printing BITS/MASK""" @@ -340,7 +349,7 @@ def output_def(self): class General: """Common code between instruction formats and instruction patterns""" - def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w): + def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w, p = None): self.name = name self.file = input_file self.lineno = lineno @@ -351,6 +360,7 @@ def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w): self.fieldmask = fldm self.fields = flds self.width = w + self.predicate = p def __str__(self): return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask) @@ -499,7 +509,7 @@ def output_code(self, i, extracted, outerbits, outermask): if outermask != p.fixedmask: innermask = p.fixedmask & ~outermask innerbits = p.fixedbits & ~outermask - output(ind, f'if ((insn & {whexC(innermask)}) == {whexC(innerbits)}) {{\n') + output(ind, f'if ((insn & {whexC(innermask)}) == {whexC(innerbits)}{predicate(p.predicate)}) {{\n') output(ind, f' /* {str_match_bits(p.fixedbits, p.fixedmask)} */\n') p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask) output(ind, '}\n') @@ -826,6 +836,7 @@ def parse_generic(lineno, parent_pat, name, toks): global re_fld_ident global re_fmt_ident global re_C_ident + global re_predicate_ident global insnwidth global insnmask global variablewidth @@ -839,6 +850,7 @@ def parse_generic(lineno, parent_pat, name, toks): flds = {} arg = None fmt = None + predicate = None for t in toks: # '&Foo' gives a format an explicit argument set. if re.fullmatch(re_arg_ident, t): @@ -881,6 +893,12 @@ def parse_generic(lineno, parent_pat, name, toks): flds = add_field(lineno, flds, fname, ConstField(value)) continue + # '|predicate' sets a predicate function to be called. + if re.fullmatch(re_predicate_ident, t): + tt = t[1:] + predicate = tt; + continue + # Pattern of 0s, 1s, dots and dashes indicate required zeros, # required ones, or dont-cares. if re.fullmatch('[01.-]+', t): @@ -979,7 +997,7 @@ def parse_generic(lineno, parent_pat, name, toks): if f not in flds.keys() and f not in fmt.fields.keys(): error(lineno, f'field {f} not initialized') pat = Pattern(name, lineno, fmt, fixedbits, fixedmask, - undefmask, fieldmask, flds, width) + undefmask, fieldmask, flds, width, predicate) parent_pat.pats.append(pat) allpatterns.append(pat)
This adds the possibility to specify a predicate-function that is called as part of decoding in multi-patterns; it is intended for use-cases (such as vendor-defined instructions in RISC-V) where the same bitpattern may decode into different functions depending on the overall configuration of the emulation target. At this time, we only support predicates for multi-patterns. Signed-off-by: Philipp Tomsich <philipp.tomsich@vrull.eu> --- docs/devel/decodetree.rst | 7 ++++++- scripts/decodetree.py | 24 +++++++++++++++++++++--- 2 files changed, 27 insertions(+), 4 deletions(-)