[RFC/RFA,v2,01/12] Implement internal functions for efficient CRC computation

Message ID	CAE65F3MQRAuunjJOeawp4rDQqZCNPWyn+HP-72sxOLMDHjCFCQ@mail.gmail.com
State	New
Headers	show Return-Path: <gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org> DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org C06D63858D28 MIME-Version: 1.0 From: Mariam Arutunian <mariamarutunian@gmail.com> Date: Fri, 26 Jul 2024 22:05:28 +0400 Message-ID: <CAE65F3MQRAuunjJOeawp4rDQqZCNPWyn+HP-72sxOLMDHjCFCQ@mail.gmail.com> Subject: [RFC/RFA][PATCH v2 01/12] Implement internal functions for efficient CRC computation To: GCC Patches <gcc-patches@gcc.gnu.org> Content-Type: multipart/mixed; boundary="00000000000002dfb6061e2a5cc7" Precedence: list Errors-To: gcc-patches-bounces~incoming=patchwork.ozlabs.org@gcc.gnu.org
Series	[RFC/RFA,v2,01/12] Implement internal functions for efficient CRC computation \| expand [RFC/RFA,v2,01/12] Implement internal functions for efficient CRC computation [RFC/RFA,v2,02/12] Add built-ins and tests for bit-forward and bit-reversed CRCs [RFC/RFA,v2,03/12] RISC-V: Add CRC expander to generate faster CRC. [RFC/RFA,v2,05/12] i386: Implement new expander for efficient CRC computation [RFC/RFA,v2,12/12] Add tests for CRC detection and generation.

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 5730bda80dc..be68ef860f9 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -8557,6 +8557,20 @@ operand 2, greater than operand 2 or is unordered with operand 2. This pattern is not allowed to @code{FAIL}. +@cindex @code{crc@var{m}@var{n}4} instruction pattern +@item @samp{crc@var{m}@var{n}4} +Calculate a bit-forward CRC using operands 1, 2 and 3, +then store the result in operand 0. +Operands 1 is the initial CRC, operands 2 is the data and operands 3 is the +polynomial without leading 1. +Operands 0, 1 and 3 have mode @var{n} and operand 2 has mode @var{m}, where +both modes are integers. The size of CRC to be calculated is determined by the +mode; for example, if @var{n} is 'hi', a CRC16 is calculated. + +@cindex @code{crc_rev@var{m}@var{n}4} instruction pattern +@item @samp{crc_rev@var{m}@var{n}4} +Similar to @samp{crc@var{m}@var{n}4}, but calculates a bit-reversed CRC. + @end table @end ifset diff --git a/gcc/expr.cc b/gcc/expr.cc index 1baa39b98eb..c9a049aeecc 100644 --- a/gcc/expr.cc +++ b/gcc/expr.cc @@ -14091,3 +14091,374 @@ int_expr_size (const_tree exp) return tree_to_shwi (size); } + +/* Calculate CRC for the initial CRC and given POLYNOMIAL. + CRC_BITS is CRC size. */ + +static unsigned HOST_WIDE_INT +calculate_crc (unsigned HOST_WIDE_INT crc, + unsigned HOST_WIDE_INT polynomial, + unsigned short crc_bits) +{ + unsigned HOST_WIDE_INT msb = HOST_WIDE_INT_1U << (crc_bits - 1); + crc = crc << (crc_bits - 8); + for (short i = 8; i > 0; --i) + { + if (crc & msb) + crc = (crc << 1) ^ polynomial; + else + crc <<= 1; + } + /* Zero out bits in crc beyond the specified number of crc_bits. */ + if (crc_bits < sizeof (crc) * CHAR_BIT) + crc &= (HOST_WIDE_INT_1U << crc_bits) - 1; + return crc; +} + +/* Assemble CRC table with 256 elements for the given POLYNOM and CRC_BITS with + given ID. + ID is the identifier of the table, the name of the table is unique, + contains CRC size and the polynomial. + POLYNOM is the polynomial used to calculate the CRC table's elements. + CRC_BITS is the size of CRC, may be 8, 16, ... . */ + +rtx +assemble_crc_table (tree id, unsigned HOST_WIDE_INT polynom, + unsigned short crc_bits) +{ + unsigned table_el_n = 0x100; + tree ar = build_array_type (make_unsigned_type (crc_bits), + build_index_type (size_int (table_el_n - 1))); + tree decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, id, ar); + SET_DECL_ASSEMBLER_NAME (decl, id); + DECL_ARTIFICIAL (decl) = 1; + rtx tab = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (id)); + TREE_ASM_WRITTEN (decl) = 0; + + /* Initialize the table. */ + vec<tree, va_gc> *initial_values; + vec_alloc (initial_values, table_el_n); + for (size_t i = 0; i < table_el_n; ++i) + { + unsigned HOST_WIDE_INT crc = calculate_crc (i, polynom, crc_bits); + tree element = build_int_cstu (make_unsigned_type (crc_bits), crc); + vec_safe_push (initial_values, element); + } + DECL_INITIAL (decl) = build_constructor_from_vec (ar, initial_values); + + TREE_READONLY (decl) = 1; + TREE_STATIC (decl) = 1; + + if (TREE_PUBLIC (id)) + { + TREE_PUBLIC (decl) = 1; + make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl)); + } + + mark_decl_referenced (decl); + varpool_node::finalize_decl (decl); + + return tab; +} + +/* Generate CRC lookup table by calculating CRC for all possible + 8-bit data values. The table is stored with a specific name in the read-only + static data section. + POLYNOM is the polynomial used to calculate the CRC table's elements. + CRC_BITS is the size of CRC, may be 8, 16, ... . */ + +rtx +generate_crc_table (unsigned HOST_WIDE_INT polynom, unsigned short crc_bits) +{ + gcc_assert (crc_bits <= 64); + + /* Buf size - 24 letters + 6 '_' + + 20 numbers (2 for crc bit size + 2 for 0x + 16 for 64-bit polynomial) + + 1 for \0. */ + char buf[51]; + sprintf (buf, "crc_table_for_crc_%u_polynomial_" HOST_WIDE_INT_PRINT_HEX, + crc_bits, polynom); + + tree id = maybe_get_identifier (buf); + if (id) + return gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (id)); + + id = get_identifier (buf); + return assemble_crc_table (id, polynom, crc_bits); +} + +/* Generate table-based CRC code for the given CRC, INPUT_DATA and the + POLYNOMIAL (without leading 1). + + First, using POLYNOMIAL's value generates CRC table of 256 elements, + then generates the assembly for the following code, + where crc_bit_size and data_bit_size may be 8, 16, 32, 64, depending on CRC: + + for (int i = 0; i < data_bit_size / 8; i++) + crc = (crc << 8) ^ crc_table[(crc >> (crc_bit_size - 8)) + ^ (data >> (data_bit_size - (i + 1) * 8) + & 0xFF))]; + + So to take values from the table, we need 8-bit data. + If input data size is not 8, then first we extract upper 8 bits, + then the other 8 bits, and so on. */ + +void +calculate_table_based_CRC (rtx *crc, const rtx &input_data, + const rtx &polynomial, + machine_mode crc_mode, machine_mode data_mode) +{ + unsigned short crc_bit_size = GET_MODE_BITSIZE (crc_mode).to_constant (); + unsigned short data_size = GET_MODE_SIZE (data_mode).to_constant (); + + rtx tab = generate_crc_table (UINTVAL (polynomial), crc_bit_size); + + for (unsigned short i = 0; i < data_size; i++) + { + /* crc >> (crc_bit_size - 8). */ + rtx op1 = expand_shift (RSHIFT_EXPR, word_mode, *crc, crc_bit_size - 8, + NULL_RTX, 1); + + /* data >> (8 * (GET_MODE_SIZE (data_mode).to_constant () - i - 1)). */ + unsigned range_8 = 8 * (data_size - i - 1); + rtx data = expand_shift (RSHIFT_EXPR, word_mode, input_data, range_8, + NULL_RTX, 1); + + /* data >> (8 * (GET_MODE_SIZE (data_mode) + .to_constant () - i - 1)) & 0xFF. */ + rtx data_final = expand_and (word_mode, data, + gen_int_mode (255, data_mode), NULL_RTX); + + /* (crc >> (crc_bit_size - 8)) ^ data_8bit. */ + rtx in = expand_binop (Pmode, xor_optab, op1, data_final, + NULL_RTX, 1, OPTAB_WIDEN); + + /* ((crc >> (crc_bit_size - 8)) ^ data_8bit) & 0xFF. */ + rtx index = expand_and (Pmode, in, gen_int_mode (255, word_mode), + NULL_RTX); + int log_crc_size = exact_log2 (GET_MODE_SIZE (crc_mode).to_constant ()); + index = expand_shift (LSHIFT_EXPR, Pmode, index, + log_crc_size, NULL_RTX, 0); + + index = expand_binop (Pmode, add_optab, index, tab, NULL_RTX, + 0, OPTAB_DIRECT); + + /* crc_table[(crc >> (crc_bit_size - 8)) ^ data_8bit] */ + rtx tab_el = validize_mem (gen_rtx_MEM (crc_mode, index)); + + /* (crc << 8) if CRC is larger than 8, otherwise crc = 0. */ + rtx high = NULL_RTX; + if (crc_bit_size != 8) + { + high = expand_shift (LSHIFT_EXPR, word_mode, *crc, 8, NULL_RTX, 0); + if (crc_mode != word_mode) + { + rtx crc_mode_mask = gen_int_mode (GET_MODE_MASK (crc_mode), + word_mode); + high = expand_and (word_mode, high, crc_mode_mask, NULL_RTX); + } + } + else + high = gen_int_mode (0, word_mode); + + /* crc = (crc << 8) + ^ crc_table[(crc >> (crc_bit_size - 8)) ^ data_8bit]; */ + *crc = expand_binop (word_mode, xor_optab, tab_el, high, NULL_RTX, 1, + OPTAB_WIDEN); + } +} + +/* Converts and moves a CRC value to a target register. + + CRC_MODE is the mode (data type) of the CRC value. + CRC is the initial CRC value. + OP0 is the target register. */ + +void +emit_crc (machine_mode crc_mode, rtx* crc, rtx* op0) +{ + if (GET_MODE_BITSIZE (crc_mode).to_constant () == 32 + && GET_MODE_BITSIZE (word_mode) == 64) + { + rtx a_low = gen_lowpart_SUBREG (crc_mode, *crc); + *crc = gen_rtx_SIGN_EXTEND (word_mode, a_low); + } + rtx tgt = *op0; + if (word_mode != crc_mode) + tgt = simplify_gen_subreg (word_mode, *op0, crc_mode, 0); + emit_move_insn (tgt, *crc); +} + +/* Generate table-based CRC code for the given CRC, INPUT_DATA and the + POLYNOMIAL (without leading 1). + + CRC is OP1, data is OP2 and the polynomial is OP3. + This must generate a CRC table and an assembly for the following code, + where crc_bit_size and data_bit_size may be 8, 16, 32, 64: + uint_crc_bit_size_t + crc_crc_bit_size (uint_crc_bit_size_t crc_init, + uint_data_bit_size_t data, size_t size) + { + uint_crc_bit_size_t crc = crc_init; + for (int i = 0; i < data_bit_size / 8; i++) + crc = (crc << 8) ^ crc_table[(crc >> (crc_bit_size - 8)) + ^ (data >> (data_bit_size - (i + 1) * 8) + & 0xFF))]; + return crc; + } */ + +void +expand_crc_table_based (rtx op0, rtx op1, rtx op2, rtx op3, + machine_mode data_mode) +{ + gcc_assert (!CONST_INT_P (op0)); + gcc_assert (CONST_INT_P (op3)); + machine_mode crc_mode = GET_MODE (op0); + rtx crc = gen_reg_rtx (word_mode); + convert_move (crc, op1, 0); + calculate_table_based_CRC (&crc, op2, op3, crc_mode, data_mode); + emit_crc (crc_mode, &crc, &op0); +} + +/* Generate the common operation for reflecting values: + *OP = (*OP & AND1_VALUE) << SHIFT_VAL | (*OP & AND2_VALUE) >> SHIFT_VAL; */ + +void +gen_common_operation_to_reflect (rtx *op, + unsigned HOST_WIDE_INT and1_value, + unsigned HOST_WIDE_INT and2_value, + unsigned shift_val) +{ + rtx op1 = expand_and (word_mode, *op, gen_int_mode (and1_value, word_mode), + NULL_RTX); + op1 = expand_shift (LSHIFT_EXPR, word_mode, op1, shift_val, op1, 0); + rtx op2 = expand_and (word_mode, *op, gen_int_mode (and2_value, word_mode), + NULL_RTX); + op2 = expand_shift (RSHIFT_EXPR, word_mode, op2, shift_val, op2, 1); + *op = expand_binop (word_mode, ior_optab, op1, op2, *op, 0, OPTAB_DIRECT); +} + +/* Reflect 64-bit value for the 64-bit target. */ + +void +reflect_64_bit_value (rtx *op) +{ + gen_common_operation_to_reflect (op, 0x00000000FFFFFFFF, + 0xFFFFFFFF00000000, 32); + gen_common_operation_to_reflect (op, 0x0000FFFF0000FFFF, + 0xFFFF0000FFFF0000, 16); + gen_common_operation_to_reflect (op, 0x00FF00FF00FF00FF, + 0xFF00FF00FF00FF00, 8); + gen_common_operation_to_reflect (op, 0x0F0F0F0F0F0F0F0F, + 0xF0F0F0F0F0F0F0F0, 4); + gen_common_operation_to_reflect (op, 0x3333333333333333, + 0xCCCCCCCCCCCCCCCC, 2); + gen_common_operation_to_reflect (op, 0x5555555555555555, + 0xAAAAAAAAAAAAAAAA, 1); +} + +/* Reflect 32-bit value for the 32-bit target. */ + +void +reflect_32_bit_value (rtx *op) +{ + gen_common_operation_to_reflect (op, 0x0000FFFF, 0xFFFF0000, 16); + gen_common_operation_to_reflect (op, 0x00FF00FF, 0xFF00FF00, 8); + gen_common_operation_to_reflect (op, 0x0F0F0F0F, 0xF0F0F0F0, 4); + gen_common_operation_to_reflect (op, 0x33333333, 0xCCCCCCCC, 2); + gen_common_operation_to_reflect (op, 0x55555555, 0xAAAAAAAA, 1); +} + +/* Reflect 16-bit value for the 16-bit target. */ + +void +reflect_16_bit_value (rtx *op) +{ + gen_common_operation_to_reflect (op, 0x00FF, 0xFF00, 8); + gen_common_operation_to_reflect (op, 0x0F0F, 0xF0F0, 4); + gen_common_operation_to_reflect (op, 0x3333, 0xCCCC, 2); + gen_common_operation_to_reflect (op, 0x5555, 0xAAAA, 1); +} + +/* Reflect 8-bit value for the 8-bit target. */ + +void +reflect_8_bit_value (rtx *op) +{ + gen_common_operation_to_reflect (op, 0x0F, 0xF0, 4); + gen_common_operation_to_reflect (op, 0x33, 0xCC, 2); + gen_common_operation_to_reflect (op, 0x55, 0xAA, 1); +} + +/* Generate instruction sequence + which reflects the value of the OP using shift, and, or operations. + OP's mode may be less than word_mode. To get the correct number, + after reflecting we shift right the value by SHIFT_VAL. + E.g. we have 1111 0001, after reflection (target 32-bit) we will get + 1000 1111 0000 0000, if we shift-out 16 bits, + we will get the desired one: 1000 1111. */ + +void +generate_reflecting_code_standard (rtx *op, int shift_val) +{ + gcc_assert (BITS_PER_WORD >= 8 && BITS_PER_WORD <= 64); + + if (BITS_PER_WORD == 64) + reflect_64_bit_value (op); + else if (BITS_PER_WORD == 32) + reflect_32_bit_value (op); + else if (BITS_PER_WORD == 16) + reflect_16_bit_value (op); + else + reflect_8_bit_value (op); + + *op = expand_shift (RSHIFT_EXPR, word_mode, *op, shift_val, *op, 1); +} + +/* Generate table-based reversed CRC code for the given CRC, INPUT_DATA and + the POLYNOMIAL (without leading 1). + + CRC is OP1, data is OP2 and the polynomial is OP3. + This must generate CRC table and assembly for the following code, + where crc_bit_size and data_bit_size may be 8, 16, 32, 64: + uint_crc_bit_size_t + crc_crc_bit_size (uint_crc_bit_size_t crc_init, + uint_data_bit_size_t data, size_t size) + { + reflect (crc_init) + uint_crc_bit_size_t crc = crc_init; + reflect (data); + for (int i = 0; i < data_bit_size / 8; i++) + crc = (crc << 8) ^ crc_table[(crc >> (crc_bit_size - 8)) + ^ (data >> (data_bit_size - (i + 1) * 8) & 0xFF))]; + reflect (crc); + return crc; + } */ + +void +expand_reversed_crc_table_based (rtx op0, rtx op1, rtx op2, rtx op3, + machine_mode data_mode, + void (*gen_reflecting_code) (rtx *op, + int shift_val)) +{ + gcc_assert (!CONST_INT_P (op0)); + gcc_assert (CONST_INT_P (op3)); + machine_mode crc_mode = GET_MODE (op0); + + unsigned short crc_bit_size = GET_MODE_BITSIZE (crc_mode).to_constant (); + unsigned short data_bit_size = GET_MODE_BITSIZE (data_mode).to_constant (); + unsigned short word_size = GET_MODE_BITSIZE (word_mode); + + rtx crc = gen_reg_rtx (word_mode); + convert_move (crc, op1, 0); + gen_reflecting_code (&crc, word_size - crc_bit_size); + + rtx data = gen_reg_rtx (word_mode); + convert_move (data, op2, 0); + gen_reflecting_code (&data, word_size - data_bit_size); + + calculate_table_based_CRC (&crc, data, op3, crc_mode, data_mode); + + gen_reflecting_code (&crc, word_size - crc_bit_size); + emit_crc (crc_mode, &crc, &op0); +} diff --git a/gcc/expr.h b/gcc/expr.h index 75181584108..74634d22777 100644 --- a/gcc/expr.h +++ b/gcc/expr.h @@ -374,4 +374,10 @@ extern rtx expr_size (tree); extern bool mem_ref_refers_to_non_mem_p (tree); extern bool non_mem_decl_p (tree); +/* Generate table-based CRC. */ +extern void generate_reflecting_code_standard (rtx *, int); +extern void expand_crc_table_based (rtx, rtx, rtx, rtx, machine_mode); +extern void expand_reversed_crc_table_based (rtx, rtx, rtx, rtx, machine_mode, + void (*) (rtx *, int)); + #endif /* GCC_EXPR_H */ diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index 9c09026793f..598b6fd1816 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -189,6 +189,7 @@ init_internal_fns () #define mask_fold_left_direct { 1, 1, false } #define mask_len_fold_left_direct { 1, 1, false } #define check_ptrs_direct { 0, 0, false } +#define crc_direct { 1, -1, true } const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = { #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) not_direct, @@ -3918,6 +3919,58 @@ expand_convert_optab_fn (internal_fn fn, gcall *stmt, convert_optab optab, expand_fn_using_insn (stmt, icode, 1, nargs); } +/* Expand CRC call STMT. */ + +static void +expand_crc_optab_fn (internal_fn fn, gcall *stmt, convert_optab optab) +{ + tree lhs = gimple_call_lhs (stmt); + tree rhs1 = gimple_call_arg (stmt, 0); // crc + tree rhs2 = gimple_call_arg (stmt, 1); // data + tree rhs3 = gimple_call_arg (stmt, 2); // polynomial + + tree result_type = TREE_TYPE (lhs); + tree data_type = TREE_TYPE (rhs2); + + gcc_assert (TYPE_MODE (result_type) >= TYPE_MODE (data_type)); + gcc_assert (word_mode >= TYPE_MODE (result_type)); + + rtx dest = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); + rtx crc = expand_normal (rhs1); + rtx data = expand_normal (rhs2); + gcc_assert (TREE_CODE (rhs3) == INTEGER_CST); + rtx polynomial = gen_rtx_CONST_INT (TYPE_MODE (result_type), + TREE_INT_CST_LOW (rhs3)); + + /* Use target specific expansion if it exists. + Otherwise, generate table-based CRC. */ + if (direct_internal_fn_supported_p (fn, tree_pair (data_type, result_type), + OPTIMIZE_FOR_SPEED)) + { + class expand_operand ops[4]; + create_call_lhs_operand (&ops[0], dest, TYPE_MODE (result_type)); + create_input_operand (&ops[1], crc, TYPE_MODE (result_type)); + create_input_operand (&ops[2], data, TYPE_MODE (data_type)); + create_input_operand (&ops[3], polynomial, TYPE_MODE (result_type)); + insn_code icode = convert_optab_handler (optab, TYPE_MODE (data_type), + TYPE_MODE (result_type)); + expand_insn (icode, 4, ops); + assign_call_lhs (lhs, dest, &ops[0]); + } + else + { + /* If it's IFN_CRC generate bit-forward CRC. */ + if (fn == IFN_CRC) + expand_crc_table_based (dest, crc, data, polynomial, + TYPE_MODE (data_type)); + else + /* If it's IFN_CRC_REV generate bit-reversed CRC. */ + expand_reversed_crc_table_based (dest, crc, data, polynomial, + TYPE_MODE (data_type), + generate_reflecting_code_standard); + } +} + /* Expanders for optabs that can use expand_direct_optab_fn. */ #define expand_unary_optab_fn(FN, STMT, OPTAB) \ @@ -4054,6 +4107,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types, #define direct_cond_len_unary_optab_supported_p direct_optab_supported_p #define direct_cond_len_binary_optab_supported_p direct_optab_supported_p #define direct_cond_len_ternary_optab_supported_p direct_optab_supported_p +#define direct_crc_optab_supported_p convert_optab_supported_p #define direct_mask_load_optab_supported_p convert_optab_supported_p #define direct_load_lanes_optab_supported_p multi_vector_optab_supported_p #define direct_mask_load_lanes_optab_supported_p multi_vector_optab_supported_p diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 25badbb86e5..76585100a63 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -200,6 +200,8 @@ along with GCC; see the file COPYING3. If not see cond_len_##UNSIGNED_OPTAB, cond_len_##TYPE) #endif +DEF_INTERNAL_OPTAB_FN (CRC, ECF_CONST | ECF_NOTHROW, crc, crc) +DEF_INTERNAL_OPTAB_FN (CRC_REV, ECF_CONST | ECF_NOTHROW, crc_rev, crc) DEF_INTERNAL_OPTAB_FN (MASK_LOAD, ECF_PURE, maskload, mask_load) DEF_INTERNAL_OPTAB_FN (LOAD_LANES, ECF_CONST, vec_load_lanes, load_lanes) DEF_INTERNAL_OPTAB_FN (MASK_LOAD_LANES, ECF_PURE, diff --git a/gcc/optabs.def b/gcc/optabs.def index 3f2cb46aff8..bf1aaac90c6 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -78,6 +78,8 @@ OPTAB_CD(smsub_widen_optab, "msub$b$a4") OPTAB_CD(umsub_widen_optab, "umsub$b$a4") OPTAB_CD(ssmsub_widen_optab, "ssmsub$b$a4") OPTAB_CD(usmsub_widen_optab, "usmsub$a$b4") +OPTAB_CD(crc_optab, "crc$a$b4") +OPTAB_CD(crc_rev_optab, "crc_rev$a$b4") OPTAB_CD(vec_load_lanes_optab, "vec_load_lanes$a$b") OPTAB_CD(vec_store_lanes_optab, "vec_store_lanes$a$b") OPTAB_CD(vec_mask_load_lanes_optab, "vec_mask_load_lanes$a$b") -- 2.25.1

[RFC/RFA,v2,01/12] Implement internal functions for efficient CRC computation

Commit Message

Comments

Patch