Runtime: ARM additional shifting intrinsics

Created on 9 Mar 2020  路  23Comments  路  Source: dotnet/runtime

namespace System.Runtime.Intrinsics.Arm
{
    public abstract class AdvSimd
    {
        /// <summary>
        /// Unsigned Shift Left
        /// For each element result[elem] = value[elem] << (shift[elem] & 0xFF)
        /// Corresponds to vector forms of USHL and VSHL
        /// </summary>
        Vector64<byte> ShiftLogical(Vector64<byte> value, Vector64<sbyte> shift);
        Vector64<sbyte> ShiftLogical(Vector64<sbyte> value, Vector64<sbyte> shift);
        Vector64<short> ShiftLogical(Vector64<short> value, Vector64<short> shift);
        Vector64<ushort> ShiftLogical(Vector64<ushort> value, Vector64<short> shift);
        Vector64<int> ShiftLogical(Vector64<int> value, Vector64<int> shift);
        Vector64<uint> ShiftLogical(Vector64<uint> value, Vector64<int> shift);
        Vector128<byte> ShiftLogical(Vector128<byte> value, Vector128<sbyte> shift);
        Vector128<sbyte> ShiftLogical(Vector128<sbyte> value, Vector128<sbyte> shift);
        Vector128<short> ShiftLogical(Vector128<short> value, Vector128<short> shift);
        Vector128<ushort> ShiftLogical(Vector128<ushort> value, Vector128<short> shift);
        Vector128<int> ShiftLogical(Vector128<int> value, Vector128<int> shift);
        Vector128<uint> ShiftLogical(Vector128<uint> value, Vector128<int> shift);
        Vector128<long> ShiftLogical(Vector128<long> value, Vector128<long> shift);
        Vector128<ulong> ShiftLogical(Vector128<ulong> value, Vector128<long> shift);

        Vector64<long> ShiftLogicalScalar(Vector64<long> value, Vector64<long> shift);
        Vector64<ulong> ShiftLogicalScalar(Vector64<ulong> value, Vector64<long> shift);

        /// <summary>
        /// Unsigned Rounding Shift Left
        /// For each element result[elem] = (value[elem] + (1 << (-(shift[elem] & 0xFF) - 1))) << (shift[elem] & 0xFF)
        /// Corresponds to vector forms of URSHL and VRSHL
        /// </summary>
        Vector64<byte> ShiftLogicalRounded(Vector64<byte> value, Vector64<sbyte> shift);
        Vector64<sbyte> ShiftLogicalRounded(Vector64<sbyte> value, Vector64<sbyte> shift);
        Vector64<short> ShiftLogicalRounded(Vector64<short> value, Vector64<short> shift);
        Vector64<ushort> ShiftLogicalRounded(Vector64<ushort> value, Vector64<short> shift);
        Vector64<int> ShiftLogicalRounded(Vector64<int> value, Vector64<int> shift);
        Vector64<uint> ShiftLogicalRounded(Vector64<uint> value, Vector64<int> shift);
        Vector128<byte> ShiftLogicalRounded(Vector128<byte> value, Vector128<sbyte> shift);
        Vector128<sbyte> ShiftLogicalRounded(Vector128<sbyte> value, Vector128<sbyte> shift);
        Vector128<short> ShiftLogicalRounded(Vector128<short> value, Vector128<short> shift);
        Vector128<ushort> ShiftLogicalRounded(Vector128<ushort> value, Vector128<short> shift);
        Vector128<int> ShiftLogicalRounded(Vector128<int> value, Vector128<int> shift);
        Vector128<uint> ShiftLogicalRounded(Vector128<uint> value, Vector128<int> shift);
        Vector128<long> ShiftLogicalRounded(Vector128<long> value, Vector128<long> shift);
        Vector128<ulong> ShiftLogicalRounded(Vector128<ulong> value, Vector128<long> shift);

        Vector64<long> ShiftLogicalRoundedScalar(Vector64<long> value, Vector64<long> shift);
        Vector64<ulong> ShiftLogicalRoundedScalar(Vector64<ulong> value, Vector64<long> shift);

        /// <summary>
        /// Signed Shift Left
        /// For each element result[elem] = value[elem] << (shift[elem] & 0xFF)
        /// Corresponds to vector forms of SSHL and VSHL
        /// </summary>
        Vector64<sbyte> ShiftArithmetic(Vector64<sbyte> value, Vector64<sbyte> shift);
        Vector64<short> ShiftArithmetic(Vector64<short> value, Vector64<short> shift);
        Vector64<int> ShiftArithmetic(Vector64<int> value, Vector64<int> shift);
        Vector128<sbyte> ShiftArithmetic(Vector128<sbyte> value, Vector128<sbyte> shift);
        Vector128<short> ShiftArithmetic(Vector128<short> value, Vector128<short> shift);
        Vector128<int> ShiftArithmetic(Vector128<int> value, Vector128<int> shift);
        Vector128<long> ShiftArithmetic(Vector128<long> value, Vector128<long> shift);

        Vector64<long> ShiftArithmeticScalar(Vector64<long> value, Vector64<long> shift);

        /// <summary>
        /// Signed Rounding Shift Left
        /// For each element result[elem] = (value[elem] + (1 << (-(shift[elem] & 0xFF) - 1))) << (shift[elem] & 0xFF)
        /// Corresponds to vector forms of SRSHL and VRSHL
        /// </summary>
        Vector64<sbyte> ShiftArithmeticRounded(Vector64<sbyte> value, Vector64<sbyte> shift);
        Vector64<short> ShiftArithmeticRounded(Vector64<short> value, Vector64<short> shift);
        Vector64<int> ShiftArithmeticRounded(Vector64<int> value, Vector64<int> shift);
        Vector128<sbyte> ShiftArithmeticRounded(Vector128<sbyte> value, Vector128<sbyte> shift);
        Vector128<short> ShiftArithmeticRounded(Vector128<short> value, Vector128<short> shift);
        Vector128<int> ShiftArithmeticRounded(Vector128<int> value, Vector128<int> shift);
        Vector128<long> ShiftArithmeticRounded(Vector128<long> value, Vector128<long> shift);

        Vector64<long> ShiftArithmeticRoundedScalar(Vector64<long> value, Vector64<long> shift);

        /// <summary>
        /// Unsigned Saturating Shift Left
        /// For each element result[elem] = value[elem] << (ShiftLogical[elem] & 0xFF)
        /// Corresponds to vector forms of UQSHL and VQSHL
        /// </summary>
        Vector64<byte> ShiftLogicalSaturate(Vector64<byte> value, Vector64<sbyte> shift);
        Vector64<sbyte> ShiftLogicalSaturate(Vector64<sbyte> value, Vector64<sbyte> shift);
        Vector64<short> ShiftLogicalSaturate(Vector64<short> value, Vector64<short> shift);
        Vector64<ushort> ShiftLogicalSaturate(Vector64<ushort> value, Vector64<short> shift);
        Vector64<int> ShiftLogicalSaturate(Vector64<int> value, Vector64<int> shift);
        Vector64<uint> ShiftLogicalSaturate(Vector64<uint> value, Vector64<int> shift);
        Vector128<byte> ShiftLogicalSaturate(Vector128<byte> value, Vector128<sbyte> shift);
        Vector128<sbyte> ShiftLogicalSaturate(Vector128<sbyte> value, Vector128<sbyte> shift);
        Vector128<short> ShiftLogicalSaturate(Vector128<short> value, Vector128<short> shift);
        Vector128<ushort> ShiftLogicalSaturate(Vector128<ushort> value, Vector128<short> shift);
        Vector128<int> ShiftLogicalSaturate(Vector128<int> value, Vector128<int> shift);
        Vector128<uint> ShiftLogicalSaturate(Vector128<uint> value, Vector128<int> shift);
        Vector128<long> ShiftLogicalSaturate(Vector128<long> value, Vector128<long> shift);
        Vector128<ulong> ShiftLogicalSaturate(Vector128<ulong> value, Vector128<long> shift);

        Vector64<long> ShiftLogicalSaturateScalar(Vector64<long> value, Vector64<long> shift);
        Vector64<ulong> ShiftLogicalSaturateScalar(Vector64<ulong> value, Vector64<long> shift);

        /// <summary>
        /// Unsigned Saturating Rounding Shift Left
        /// For each element result[elem] = (value[elem] + (1 << (-(ShiftLogical[elem] & 0xFF) - 1))) << (ShiftLogical[elem] & 0xFF)
        /// Corresponds to vector forms of UQRSHL and VQRSHL
        /// </summary>
        Vector64<byte> ShiftLogicalRoundedSaturate(Vector64<byte> value, Vector64<sbyte> shift);
        Vector64<sbyte> ShiftLogicalRoundedSaturate(Vector64<sbyte> value, Vector64<sbyte> shift);
        Vector64<short> ShiftLogicalRoundedSaturate(Vector64<short> value, Vector64<short> shift);
        Vector64<ushort> ShiftLogicalRoundedSaturate(Vector64<ushort> value, Vector64<short> shift);
        Vector64<int> ShiftLogicalRoundedSaturate(Vector64<int> value, Vector64<int> shift);
        Vector64<uint> ShiftLogicalRoundedSaturate(Vector64<uint> value, Vector64<int> shift);
        Vector128<byte> ShiftLogicalRoundedSaturate(Vector128<byte> value, Vector128<sbyte> shift);
        Vector128<sbyte> ShiftLogicalRoundedSaturate(Vector128<sbyte> value, Vector128<sbyte> shift);
        Vector128<short> ShiftLogicalRoundedSaturate(Vector128<short> value, Vector128<short> shift);
        Vector128<ushort> ShiftLogicalRoundedSaturate(Vector128<ushort> value, Vector128<short> shift);
        Vector128<int> ShiftLogicalRoundedSaturate(Vector128<int> value, Vector128<int> shift);
        Vector128<uint> ShiftLogicalRoundedSaturate(Vector128<uint> value, Vector128<int> shift);
        Vector128<long> ShiftLogicalRoundedSaturate(Vector128<long> value, Vector128<long> shift);
        Vector128<ulong> ShiftLogicalRoundedSaturate(Vector128<ulong> value, Vector128<long> shift);

        Vector64<long> ShiftLogicalRoundedSaturateScalar(Vector64<long> value, Vector64<long> shift);
        Vector64<ulong> ShiftLogicalRoundedSaturateScalar(Vector64<ulong> value, Vector64<long> shift);

        /// <summary>
        /// Signed Saturating Shift Left
        /// For each element result[elem] = value[elem] << (ShiftLogical[elem] & 0xFF)
        /// Corresponds to vector forms of SQSHL and VQSHL
        /// </summary>
        Vector64<sbyte> ShiftArithmeticSaturate(Vector64<sbyte> value, Vector64<sbyte> shift);
        Vector64<short> ShiftArithmeticSaturate(Vector64<short> value, Vector64<short> shift);
        Vector64<int> ShiftArithmeticSaturate(Vector64<int> value, Vector64<int> shift);
        Vector128<sbyte> ShiftArithmeticSaturate(Vector128<sbyte> value, Vector128<sbyte> shift);
        Vector128<short> ShiftArithmeticSaturate(Vector128<short> value, Vector128<short> shift);
        Vector128<int> ShiftArithmeticSaturate(Vector128<int> value, Vector128<int> shift);
        Vector128<long> ShiftArithmeticSaturate(Vector128<long> value, Vector128<long> shift);

        Vector64<long> ShiftArithmeticSaturateScalar(Vector64<long> value, Vector64<long> shift);

        /// <summary>
        /// Signed Saturating Rounding Shift Left
        /// For each element result[elem] = (value[elem] + (1 << (-(ShiftLogical[elem] & 0xFF) - 1))) << (ShiftLogical[elem] & 0xFF)
        /// Corresponds to vector forms of SQRSHL and VQRSHL
        /// </summary>
        Vector64<sbyte> ShiftArithmeticRoundedSaturate(Vector64<sbyte> value, Vector64<sbyte> shift);
        Vector64<short> ShiftArithmeticRoundedSaturate(Vector64<short> value, Vector64<short> shift);
        Vector64<int> ShiftArithmeticRoundedSaturate(Vector64<int> value, Vector64<int> shift);
        Vector128<sbyte> ShiftArithmeticRoundedSaturate(Vector128<sbyte> value, Vector128<sbyte> shift);
        Vector128<short> ShiftArithmeticRoundedSaturate(Vector128<short> value, Vector128<short> shift);
        Vector128<int> ShiftArithmeticRoundedSaturate(Vector128<int> value, Vector128<int> shift);
        Vector128<long> ShiftArithmeticRoundedSaturate(Vector128<long> value, Vector128<long> shift);

        Vector64<long> ShiftArithmeticRoundedSaturateScalar(Vector64<long> value, Vector64<long> shift);

        /// <summary>
        /// Shift Left Immediate
        /// For each element result[elem] = value[elem] << shift
        /// Corresponds to vector forms of SHL and VSHL
        /// </summary>
        Vector64<byte> ShiftLeftLogical(Vector64<byte> value, byte shift);
        Vector64<sbyte> ShiftLeftLogical(Vector64<sbyte> value, byte shift);
        Vector64<short> ShiftLeftLogical(Vector64<short> value, byte shift);
        Vector64<ushort> ShiftLeftLogical(Vector64<ushort> value, byte shift);
        Vector64<int> ShiftLeftLogical(Vector64<int> value, byte shift);
        Vector64<uint> ShiftLeftLogical(Vector64<uint> value, byte shift);
        Vector128<byte> ShiftLeftLogical(Vector128<byte> value, byte shift);
        Vector128<sbyte> ShiftLeftLogical(Vector128<sbyte> value, byte shift);
        Vector128<short> ShiftLeftLogical(Vector128<short> value, byte shift);
        Vector128<ushort> ShiftLeftLogical(Vector128<ushort> value, byte shift);
        Vector128<int> ShiftLeftLogical(Vector128<int> value, byte shift);
        Vector128<uint> ShiftLeftLogical(Vector128<uint> value, byte shift);
        Vector128<long> ShiftLeftLogical(Vector128<long> value, byte shift);
        Vector128<ulong> ShiftLeftLogical(Vector128<ulong> value, byte shift);

        Vector64<long> ShiftLeftLogicalScalar(Vector64<long> value, byte shift);
        Vector64<ulong> ShiftLeftLogicalScalar(Vector64<ulong> value, byte shift);

        /// <summary>
        /// Unsigned Shift Right Immediate
        /// For each element result[elem] = value[elem] >> shift
        /// Corresponds to vector forms of USHR and VSHR
        /// </summary>
        Vector64<byte> ShiftRightLogical(Vector64<byte> value, byte shift);
        Vector64<sbyte> ShiftRightLogical(Vector64<sbyte> value, byte shift);
        Vector64<short> ShiftRightLogical(Vector64<short> value, byte shift);
        Vector64<ushort> ShiftRightLogical(Vector64<ushort> value, byte shift);
        Vector64<int> ShiftRightLogical(Vector64<int> value, byte shift);
        Vector64<uint> ShiftRightLogical(Vector64<uint> value, byte shift);
        Vector128<byte> ShiftRightLogical(Vector128<byte> value, byte shift);
        Vector128<sbyte> ShiftRightLogical(Vector128<sbyte> value, byte shift);
        Vector128<short> ShiftRightLogical(Vector128<short> value, byte shift);
        Vector128<ushort> ShiftRightLogical(Vector128<ushort> value, byte shift);
        Vector128<int> ShiftRightLogical(Vector128<int> value, byte shift);
        Vector128<uint> ShiftRightLogical(Vector128<uint> value, byte shift);
        Vector128<long> ShiftRightLogical(Vector128<long> value, byte shift);
        Vector128<ulong> ShiftRightLogical(Vector128<ulong> value, byte shift);

        Vector64<long> ShiftRightLogicalScalar(Vector64<long> value, byte shift);
        Vector64<ulong> ShiftRightLogicalScalar(Vector64<ulong> value, byte shift);

        /// <summary>
        /// Unsigned Rounding Shift Right Immediate
        /// For each element result[elem] = (value[elem] + (1 << (shift - 1))) >> shift
        /// Corresponds to vector forms of URSHR and VRSHR
        /// </summary>
        Vector64<byte> ShiftRightLogicalRounded(Vector64<byte> value, byte shift);
        Vector64<sbyte> ShiftRightLogicalRounded(Vector64<sbyte> value, byte shift);
        Vector64<short> ShiftRightLogicalRounded(Vector64<short> value, byte shift);
        Vector64<ushort> ShiftRightLogicalRounded(Vector64<ushort> value, byte shift);
        Vector64<int> ShiftRightLogicalRounded(Vector64<int> value, byte shift);
        Vector64<uint> ShiftRightLogicalRounded(Vector64<uint> value, byte shift);
        Vector128<byte> ShiftRightLogicalRounded(Vector128<byte> value, byte shift);
        Vector128<sbyte> ShiftRightLogicalRounded(Vector128<sbyte> value, byte shift);
        Vector128<short> ShiftRightLogicalRounded(Vector128<short> value, byte shift);
        Vector128<ushort> ShiftRightLogicalRounded(Vector128<ushort> value, byte shift);
        Vector128<int> ShiftRightLogicalRounded(Vector128<int> value, byte shift);
        Vector128<uint> ShiftRightLogicalRounded(Vector128<uint> value, byte shift);
        Vector128<long> ShiftRightLogicalRounded(Vector128<long> value, byte shift);
        Vector128<ulong> ShiftRightLogicalRounded(Vector128<ulong> value, byte shift);

        Vector64<long> ShiftRightLogicalRoundedScalar(Vector64<long> value, byte shift);
        Vector64<ulong> ShiftRightLogicalRoundedScalar(Vector64<ulong> value, byte shift);

        /// <summary>
        /// Signed Shift Right Immediate
        /// For each element result[elem] = value[elem] >> shift
        /// Corresponds to vector forms of SSHR and VSHR
        /// </summary>
        Vector64<sbyte> ShiftRightArithmetic(Vector64<sbyte> value, byte shift);
        Vector64<short> ShiftRightArithmetic(Vector64<short> value, byte shift);
        Vector64<int> ShiftRightArithmetic(Vector64<int> value, byte shift);
        Vector128<sbyte> ShiftRightArithmetic(Vector128<sbyte> value, byte shift);
        Vector128<short> ShiftRightArithmetic(Vector128<short> value, byte shift);
        Vector128<int> ShiftRightArithmetic(Vector128<int> value, byte shift);
        Vector128<long> ShiftRightArithmetic(Vector128<long> value, byte shift);

        Vector64<long> ShiftRightArithmeticScalar(Vector64<long> value, byte shift);

        /// <summary>
        /// Signed Rounding Shift Right Immediate
        /// For each element result[elem] = (value[elem] + (1 << (shift - 1))) >> shift
        /// Corresponds to vector forms of SRSHR and VRSHR
        /// </summary>
        Vector64<sbyte> ShiftRightArithmeticRounded(Vector64<sbyte> value, byte shift);
        Vector64<short> ShiftRightArithmeticRounded(Vector64<short> value, byte shift);
        Vector64<int> ShiftRightArithmeticRounded(Vector64<int> value, byte shift);
        Vector128<sbyte> ShiftRightArithmeticRounded(Vector128<sbyte> value, byte shift);
        Vector128<short> ShiftRightArithmeticRounded(Vector128<short> value, byte shift);
        Vector128<int> ShiftRightArithmeticRounded(Vector128<int> value, byte shift);
        Vector128<long> ShiftRightArithmeticRounded(Vector128<long> value, byte shift);

        Vector64<long> ShiftRightArithmeticRoundedScalar(Vector64<long> value, byte shift);

        /// <summary>
        /// Unsigned Shift Right and Accumulate
        /// For each element result[elem] = addend[elem] + (value[elem] >> shift)
        /// Corresponds to vector forms of USRA and VSRA
        /// </summary>
        Vector64<byte> ShiftRightLogicalAdd(Vector64<byte> addend, Vector64<byte> value, byte shift);
        Vector64<sbyte> ShiftRightLogicalAdd(Vector64<sbyte> addend, Vector64<sbyte> value, byte shift);
        Vector64<short> ShiftRightLogicalAdd(Vector64<short> addend, Vector64<short> value, byte shift);
        Vector64<ushort> ShiftRightLogicalAdd(Vector64<ushort> addend, Vector64<ushort> value, byte shift);
        Vector64<int> ShiftRightLogicalAdd(Vector64<int> addend, Vector64<int> value, byte shift);
        Vector64<uint> ShiftRightLogicalAdd(Vector64<uint> addend, Vector64<uint> value, byte shift);
        Vector128<byte> ShiftRightLogicalAdd(Vector128<byte> addend, Vector128<byte> value, byte shift);
        Vector128<sbyte> ShiftRightLogicalAdd(Vector128<sbyte> addend, Vector128<sbyte> value, byte shift);
        Vector128<short> ShiftRightLogicalAdd(Vector128<short> addend, Vector128<short> value, byte shift);
        Vector128<ushort> ShiftRightLogicalAdd(Vector128<ushort> addend, Vector128<ushort> value, byte shift);
        Vector128<int> ShiftRightLogicalAdd(Vector128<int> addend, Vector128<int> value, byte shift);
        Vector128<uint> ShiftRightLogicalAdd(Vector128<uint> addend, Vector128<uint> value, byte shift);
        Vector128<long> ShiftRightLogicalAdd(Vector128<long> addend, Vector128<long> value, byte shift);
        Vector128<ulong> ShiftRightLogicalAdd(Vector128<ulong> addend, Vector128<ulong> value, byte shift);

        Vector64<long> ShiftRightLogicalAddScalar(Vector64<long> addend, Vector64<long> value, byte shift);
        Vector64<ulong> ShiftRightLogicalAddScalar(Vector64<ulong> addend, Vector64<ulong> value, byte shift);

        /// <summary>
        /// Signed Shift Right and Accumulate
        /// For each element result[elem] = addend[elem] + (value[elem] >> shift)
        /// Corresponds to vector forms of SSRA and VSRA
        /// </summary>
        Vector64<sbyte> ShiftRightArithmeticAdd(Vector64<sbyte> addend, Vector64<sbyte> value, byte shift);
        Vector64<short> ShiftRightArithmeticAdd(Vector64<short> addend, Vector64<short> value, byte shift);
        Vector64<int> ShiftRightArithmeticAdd(Vector64<int> addend, Vector64<int> value, byte shift);
        Vector128<sbyte> ShiftRightArithmeticAdd(Vector128<sbyte> addend, Vector128<sbyte> value, byte shift);
        Vector128<short> ShiftRightArithmeticAdd(Vector128<short> addend, Vector128<short> value, byte shift);
        Vector128<int> ShiftRightArithmeticAdd(Vector128<int> addend, Vector128<int> value, byte shift);
        Vector128<long> ShiftRightArithmeticAdd(Vector128<long> addend, Vector128<long> value, byte shift);

        Vector64<long> ShiftRightArithmeticAddScalar(Vector64<long> addend, Vector64<long> value, byte shift);

        /// <summary>
        /// Unsigned Rounding Shift Right and Accumulate
        /// For each element result[elem] = addend[elem] + (value[elem] >> RoundedShift)
        /// Corresponds to vector forms of URSRA and VRSRA
        /// </summary>
        Vector64<byte> ShiftRightLogicalAddRounded(Vector64<byte> addend, Vector64<byte> value, byte shift);
        Vector64<sbyte> ShiftRightLogicalAddRounded(Vector64<sbyte> addend, Vector64<sbyte> value, byte shift);
        Vector64<short> ShiftRightLogicalAddRounded(Vector64<short> addend, Vector64<short> value, byte shift);
        Vector64<ushort> ShiftRightLogicalAddRounded(Vector64<ushort> addend, Vector64<ushort> value, byte shift);
        Vector64<int> ShiftRightLogicalAddRounded(Vector64<int> addend, Vector64<int> value, byte shift);
        Vector64<uint> ShiftRightLogicalAddRounded(Vector64<uint> addend, Vector64<uint> value, byte shift);
        Vector128<byte> ShiftRightLogicalAddRounded(Vector128<byte> addend, Vector128<byte> value, byte shift);
        Vector128<sbyte> ShiftRightLogicalAddRounded(Vector128<sbyte> addend, Vector128<sbyte> value, byte shift);
        Vector128<short> ShiftRightLogicalAddRounded(Vector128<short> addend, Vector128<short> value, byte shift);
        Vector128<ushort> ShiftRightLogicalAddRounded(Vector128<ushort> addend, Vector128<ushort> value, byte shift);
        Vector128<int> ShiftRightLogicalAddRounded(Vector128<int> addend, Vector128<int> value, byte shift);
        Vector128<uint> ShiftRightLogicalAddRounded(Vector128<uint> addend, Vector128<uint> value, byte shift);
        Vector128<long> ShiftRightLogicalAddRounded(Vector128<long> addend, Vector128<long> value, byte shift);
        Vector128<ulong> ShiftRightLogicalAddRounded(Vector128<ulong> addend, Vector128<ulong> value, byte shift);

        Vector64<long> ShiftRightLogicalAddRoundedScalar(Vector64<long> addend, Vector64<long> value, byte shift);
        Vector64<ulong> ShiftRightLogicalAddRoundedScalar(Vector64<ulong> addend, Vector64<ulong> value, byte shift);

        /// <summary>
        /// Signed Rounding Shift Right and Accumulate
        /// For each element result[elem] = addend[elem] + (value[elem] >> RoundedShift)
        /// Corresponds to vector forms of SRSRA and VRSRA
        /// </summary>
        Vector64<sbyte> ShiftRightArithmeticAddRounded(Vector64<sbyte> addend, Vector64<sbyte> value, byte shift);
        Vector64<short> ShiftRightArithmeticAddRounded(Vector64<short> addend, Vector64<short> value, byte shift);
        Vector64<int> ShiftRightArithmeticAddRounded(Vector64<int> addend, Vector64<int> value, byte shift);
        Vector128<sbyte> ShiftRightArithmeticAddRounded(Vector128<sbyte> addend, Vector128<sbyte> value, byte shift);
        Vector128<short> ShiftRightArithmeticAddRounded(Vector128<short> addend, Vector128<short> value, byte shift);
        Vector128<int> ShiftRightArithmeticAddRounded(Vector128<int> addend, Vector128<int> value, byte shift);
        Vector128<long> ShiftRightArithmeticAddRounded(Vector128<long> addend, Vector128<long> value, byte shift);

        Vector64<long> ShiftRightArithmeticAddRoundedScalar(Vector64<long> addend, Vector64<long> value, byte shift);

        /// <summary>
        /// Signed Saturating Shift Left and Unsigned Saturating Shift Left
        /// For each element result[elem] = value[elem] << shift
        /// Corresponds to vector forms of SQSHL, UQSHL, and VQSHL
        /// </summary>
        Vector64<byte> ShiftLeftLogicalSaturate(Vector64<byte> value, byte shift);
        Vector64<sbyte> ShiftLeftLogicalSaturate(Vector64<sbyte> value, byte shift);
        Vector64<short> ShiftLeftLogicalSaturate(Vector64<short> value, byte shift);
        Vector64<ushort> ShiftLeftLogicalSaturate(Vector64<ushort> value, byte shift);
        Vector64<int> ShiftLeftLogicalSaturate(Vector64<int> value, byte shift);
        Vector64<uint> ShiftLeftLogicalSaturate(Vector64<uint> value, byte shift);
        Vector128<byte> ShiftLeftLogicalSaturate(Vector128<byte> value, byte shift);
        Vector128<sbyte> ShiftLeftLogicalSaturate(Vector128<sbyte> value, byte shift);
        Vector128<short> ShiftLeftLogicalSaturate(Vector128<short> value, byte shift);
        Vector128<ushort> ShiftLeftLogicalSaturate(Vector128<ushort> value, byte shift);
        Vector128<int> ShiftLeftLogicalSaturate(Vector128<int> value, byte shift);
        Vector128<uint> ShiftLeftLogicalSaturate(Vector128<uint> value, byte shift);
        Vector128<long> ShiftLeftLogicalSaturate(Vector128<long> value, byte shift);
        Vector128<ulong> ShiftLeftLogicalSaturate(Vector128<ulong> value, byte shift);

        Vector64<long> ShiftLeftLogicalSaturateScalar(Vector64<long> value, byte shift);
        Vector64<ulong> ShiftLeftLogicalSaturateScalar(Vector64<ulong> value, byte shift);

        /// <summary>
        /// Signed Saturating Shift Left Unsigned
        /// For each element result[elem] = value[elem] << shift
        /// Corresponds to vector forms of SQSHLU and VQSHLU
        /// </summary>
        Vector64<byte> ShiftLeftLogicalSaturateUnsigned(Vector64<sbyte> value, byte shift);
        Vector64<ushort> ShiftLeftLogicalSaturateUnsigned(Vector64<short> value, byte shift);
        Vector64<uint> ShiftLeftLogicalSaturateUnsigned(Vector64<int> value, byte shift);
        Vector128<byte> ShiftLeftLogicalSaturateUnsigned(Vector128<sbyte> value, byte shift);
        Vector128<ushort> ShiftLeftLogicalSaturateUnsigned(Vector128<short> value, byte shift);
        Vector128<uint> ShiftLeftLogicalSaturateUnsigned(Vector128<int> value, byte shift);
        Vector128<ulong> ShiftLeftLogicalSaturateUnsigned(Vector128<long> value, byte shift);

        Vector64<ulong> ShiftLeftLogicalSaturateUnsignedScalar(Vector64<long> value, byte shift);

        /// <summary>
        /// Shift Right Narrow Immediate
        /// For each element result[elem] = value[elem] >> shift
        /// Corresponds to vector forms of SHRN and VSHRN
        /// </summary>
        Vector64<sbyte> ShiftRightLogicalAndNarrowLower(Vector128<short> value, byte shift);
        Vector64<byte> ShiftRightLogicalAndNarrowLower(Vector128<ushort> value, byte shift);
        Vector64<short> ShiftRightLogicalAndNarrowLower(Vector128<int> value, byte shift);
        Vector64<ushort> ShiftRightLogicalAndNarrowLower(Vector128<uint> value, byte shift);
        Vector64<int> ShiftRightLogicalAndNarrowLower(Vector128<long> value, byte shift);
        Vector64<uint> ShiftRightLogicalAndNarrowLower(Vector128<ulong> value, byte shift);

        /// <summary>
        /// Rounding Shift Right Narrow Immediate
        /// For each element result[elem] = (value[elem] + (1 << (shift - 1))) >> shift
        /// Corresponds to vector forms of RSHRN and VRSHRN
        /// </summary>
        Vector64<sbyte> ShiftRightLogicalAndNarrowRoundedLower(Vector128<short> value, byte shift);
        Vector64<byte> ShiftRightLogicalAndNarrowRoundedLower(Vector128<ushort> value, byte shift);
        Vector64<short> ShiftRightLogicalAndNarrowRoundedLower(Vector128<int> value, byte shift);
        Vector64<ushort> ShiftRightLogicalAndNarrowRoundedLower(Vector128<uint> value, byte shift);
        Vector64<int> ShiftRightLogicalAndNarrowRoundedLower(Vector128<long> value, byte shift);
        Vector64<uint> ShiftRightLogicalAndNarrowRoundedLower(Vector128<ulong> value, byte shift);

        /// <summary>
        /// Shift Left Long
        /// For each element result[elem] = value[elem] << shift
        /// Corresponds to vector forms of SHLL and VSHLL
        /// </summary>
        Vector128<short> ShiftLeftLogicalAndWidenLower(Vector64<sbyte> value, byte shift);
        Vector128<ushort> ShiftLeftLogicalAndWidenLower(Vector64<byte> value, byte shift);
        Vector128<int> ShiftLeftLogicalAndWidenLower(Vector64<short> value, byte shift);
        Vector128<uint> ShiftLeftLogicalAndWidenLower(Vector64<ushort> value, byte shift);
        Vector128<long> ShiftLeftLogicalAndWidenLower(Vector64<int> value, byte shift);
        Vector128<ulong> ShiftLeftLogicalAndWidenLower(Vector64<uint> value, byte shift);

        /// <summary>
        /// Unsigned Saturating Shift Right Narrow Immediate
        /// For each element result[elem] = value[elem] >> shift
        /// Corresponds to vector forms of UQSHRN and VQSHRUN
        /// </summary>
        Vector64<sbyte> ShiftRightLogicalAndNarrowSaturateLower(Vector128<short> value, byte shift);
        Vector64<byte> ShiftRightLogicalAndNarrowSaturateLower(Vector128<ushort> value, byte shift);
        Vector64<short> ShiftRightLogicalAndNarrowSaturateLower(Vector128<int> value, byte shift);
        Vector64<ushort> ShiftRightLogicalAndNarrowSaturateLower(Vector128<uint> value, byte shift);
        Vector64<int> ShiftRightLogicalAndNarrowSaturateLower(Vector128<long> value, byte shift);
        Vector64<uint> ShiftRightLogicalAndNarrowSaturateLower(Vector128<ulong> value, byte shift);

        /// <summary>
        /// Unsigned Saturating Rounded Shift Right Narrow Immediate
        /// For each element result[elem] = value[elem] >> shift
        /// Corresponds to vector forms of UQRSHRN and VQRSHRUN
        /// </summary>
        Vector64<sbyte> ShiftRightLogicalAndNarrowRoundedSaturateLower(Vector128<short> value, byte shift);
        Vector64<byte> ShiftRightLogicalAndNarrowRoundedSaturateLower(Vector128<ushort> value, byte shift);
        Vector64<short> ShiftRightLogicalAndNarrowRoundedSaturateLower(Vector128<int> value, byte shift);
        Vector64<ushort> ShiftRightLogicalAndNarrowRoundedSaturateLower(Vector128<uint> value, byte shift);
        Vector64<int> ShiftRightLogicalAndNarrowRoundedSaturateLower(Vector128<long> value, byte shift);
        Vector64<uint> ShiftRightLogicalAndNarrowRoundedSaturateLower(Vector128<ulong> value, byte shift);

        /// <summary>
        /// Signed Saturating Shift Right Narrow Immediate
        /// For each element result[elem] = value[elem] >> shift
        /// Corresponds to vector forms of SQSHRN and VQSHRN
        /// </summary>
        Vector64<sbyte> ShiftRightArithmeticAndNarrowSaturateLower(Vector128<short> value, byte shift);
        Vector64<short> ShiftRightArithmeticAndNarrowSaturateLower(Vector128<int> value, byte shift);
        Vector64<int> ShiftRightArithmeticAndNarrowSaturateLower(Vector128<long> value, byte shift);

        /// <summary>
        /// Signed Saturating Rounded Shift Right Narrow Immediate
        /// For each element result[elem] = value[elem] >> shift
        /// Corresponds to vector forms of SQRSHRN and VQRSHRN
        /// </summary>
        Vector64<sbyte> ShiftRightArithmeticAndNarrowRoundedSaturateLower(Vector128<short> value, byte shift);
        Vector64<short> ShiftRightArithmeticAndNarrowRoundedSaturateLower(Vector128<int> value, byte shift);
        Vector64<int> ShiftRightArithmeticAndNarrowRoundedSaturateLower(Vector128<long> value, byte shift);

        public abstract class Arm64
        {
            /// <summary>
            /// Signed Saturating Rounding Shift Left
            /// For each element result[elem] = (value[elem] + (1 << (-(ShiftLogical[elem] & 0xFF) - 1))) << (ShiftLogical[elem] & 0xFF)
            /// Corresponds to vector forms of SQRSHL
            /// </summary>
            Vector64<sbyte> ShiftArithmeticRoundedSaturateScalar(Vector64<sbyte> value, Vector64<sbyte> shift);
            Vector64<short> ShiftArithmeticRoundedSaturateScalar(Vector64<short> value, Vector64<short> shift);
            Vector64<int> ShiftArithmeticRoundedSaturateScalar(Vector64<int> value, Vector64<int> shift);

            /// <summary>
            /// Signed Saturating Shift Left
            /// For each element result[elem] = value[elem] << (ShiftLogical[elem] & 0xFF)
            /// Corresponds to vector forms of SQSHL
            /// </summary>
            Vector64<sbyte> ShiftArithmeticSaturateScalar(Vector64<sbyte> value, Vector64<sbyte> shift);
            Vector64<short> ShiftArithmeticSaturateScalar(Vector64<short> value, Vector64<short> shift);
            Vector64<int> ShiftArithmeticSaturateScalar(Vector64<int> value, Vector64<int> shift);

            /// <summary>
            /// Signed Rounding Shift Left
            /// For each element result[elem] = (value[elem] + (1 << (-(shift[elem] & 0xFF) - 1))) << (shift[elem] & 0xFF)
            /// Corresponds to vector forms of SRSHL
            /// </summary>
            Vector64<sbyte> ShiftArithmeticRoundedScalar(Vector64<sbyte> value, Vector64<sbyte> shift);
            Vector64<short> ShiftArithmeticRoundedScalar(Vector64<short> value, Vector64<short> shift);
            Vector64<int> ShiftArithmeticRoundedScalar(Vector64<int> value, Vector64<int> shift);

            /// <summary>
            /// Signed Shift Left
            /// For each element result[elem] = value[elem] << (shift[elem] & 0xFF)
            /// Corresponds to vector forms of SSHL
            /// </summary>
            Vector64<sbyte> ShiftArithmeticScalar(Vector64<sbyte> value, Vector64<sbyte> shift);
            Vector64<short> ShiftArithmeticScalar(Vector64<short> value, Vector64<short> shift);
            Vector64<int> ShiftArithmeticScalar(Vector64<int> value, Vector64<int> shift);

            /// <summary>
            /// Unsigned Saturating Rounding Shift Left
            /// For each element result[elem] = (value[elem] + (1 << (-(ShiftLogical[elem] & 0xFF) - 1))) << (ShiftLogical[elem] & 0xFF)
            /// Corresponds to vector forms of UQRSHL and VQRSHL
            /// </summary>
            Vector64<byte> ShiftLogicalRoundedSaturateScalar(Vector64<byte> value, Vector64<sbyte> shift);
            Vector64<sbyte> ShiftLogicalRoundedSaturateScalar(Vector64<sbyte> value, Vector64<sbyte> shift);
            Vector64<short> ShiftLogicalRoundedSaturateScalar(Vector64<short> value, Vector64<short> shift);
            Vector64<ushort> ShiftLogicalRoundedSaturateScalar(Vector64<ushort> value, Vector64<short> shift);
            Vector64<int> ShiftLogicalRoundedSaturateScalar(Vector64<int> value, Vector64<int> shift);
            Vector64<uint> ShiftLogicalRoundedSaturateScalar(Vector64<uint> value, Vector64<int> shift);

            /// <summary>
            /// Unsigned Saturating Shift Left
            /// For each element result[elem] = value[elem] << (ShiftLogical[elem] & 0xFF)
            /// Corresponds to vector forms of UQSHL and VQSHL
            /// </summary>
            Vector64<byte> ShiftLogicalSaturateScalar(Vector64<byte> value, Vector64<sbyte> shift);
            Vector64<sbyte> ShiftLogicalSaturateScalar(Vector64<sbyte> value, Vector64<sbyte> shift);
            Vector64<short> ShiftLogicalSaturateScalar(Vector64<short> value, Vector64<short> shift);
            Vector64<ushort> ShiftLogicalSaturateScalar(Vector64<ushort> value, Vector64<short> shift);
            Vector64<int> ShiftLogicalSaturateScalar(Vector64<int> value, Vector64<int> shift);
            Vector64<uint> ShiftLogicalSaturateScalar(Vector64<uint> value, Vector64<int> shift);

            /// <summary>
            /// Unsigned Rounding Shift Left
            /// For each element result[elem] = (value[elem] + (1 << (-(shift[elem] & 0xFF) - 1))) << (shift[elem] & 0xFF)
            /// Corresponds to vector forms of URSHL and VRSHL
            /// </summary>
            Vector64<byte> ShiftLogicalRoundedScalar(Vector64<byte> value, Vector64<sbyte> shift);
            Vector64<sbyte> ShiftLogicalRoundedScalar(Vector64<sbyte> value, Vector64<sbyte> shift);
            Vector64<short> ShiftLogicalRoundedScalar(Vector64<short> value, Vector64<short> shift);
            Vector64<ushort> ShiftLogicalRoundedScalar(Vector64<ushort> value, Vector64<short> shift);
            Vector64<int> ShiftLogicalRoundedScalar(Vector64<int> value, Vector64<int> shift);
            Vector64<uint> ShiftLogicalRoundedScalar(Vector64<uint> value, Vector64<int> shift);


            /// <summary>
            /// Unsigned Shift Left
            /// For each element result[elem] = value[elem] << (shift[elem] & 0xFF)
            /// Corresponds to vector forms of USHL and VSHL
            /// </summary>
            Vector64<byte> ShiftLogicalScalar(Vector64<byte> value, Vector64<sbyte> shift);
            Vector64<sbyte> ShiftLogicalScalar(Vector64<sbyte> value, Vector64<sbyte> shift);
            Vector64<short> ShiftLogicalScalar(Vector64<short> value, Vector64<short> shift);
            Vector64<ushort> ShiftLogicalScalar(Vector64<ushort> value, Vector64<short> shift);
            Vector64<int> ShiftLogicalScalar(Vector64<int> value, Vector64<int> shift);
            Vector64<uint> ShiftLogicalScalar(Vector64<uint> value, Vector64<int> shift);

            /// <summary>
            /// Signed Extend Long
            /// For each element result[elem] = value[elem] << shift
            /// Corresponds to vector forms of SXTL
            /// </summary>
            Vector128<short> SignExtendAndWidenLower(Vector64<sbyte> value, byte shift);
            Vector128<int> SignExtendAndWidenLower(Vector64<short> value, byte shift);
            Vector128<long> SignExtendAndWidenLower(Vector64<int> value, byte shift);

            /// <summary>
            /// Unsigned Extend Long
            /// For each element result[elem] = value[elem] << shift
            /// Corresponds to vector forms of UXTL
            /// </summary>
            Vector128<short> ZeroExtendAndWidenLower(Vector64<sbyte> value, byte shift);
            Vector128<ushort> ZeroExtendAndWidenLower(Vector64<byte> value, byte shift);
            Vector128<int> ZeroExtendAndWidenLower(Vector64<short> value, byte shift);
            Vector128<uint> ZeroExtendAndWidenLower(Vector64<ushort> value, byte shift);
            Vector128<long> ZeroExtendAndWidenLower(Vector64<int> value, byte shift);
            Vector128<ulong> ZeroExtendAndWidenLower(Vector64<uint> value, byte shift);

            /// <summary>
            /// Signed Saturating Shift Right Unsigned Narrow Immediate
            /// For each element result[elem] = value[elem] >> shift
            /// Corresponds to vector forms of SQSHRUN
            /// </summary>
            Vector64<byte> ShiftRightArithmeticUnsignedAndNarrowSaturateLower(Vector128<short> value, byte shift);
            Vector64<ushort> ShiftRightArithmeticUnsignedAndNarrowSaturateLower(Vector128<int> value, byte shift);
            Vector64<uint> ShiftRightArithmeticUnsignedAndNarrowSaturateLower(Vector128<long> value, byte shift);

            /// <summary>
            /// Signed Saturating Rounded Shift Right Unsigned Narrow Immediate
            /// For each element result[elem] = value[elem] >> shift
            /// Corresponds to vector forms of SQRSHRUN
            /// </summary>
            Vector64<byte> ShiftRightArithmeticUnsignedAndNarrowRoundedSaturateLower(Vector128<short> value, byte shift);
            Vector64<ushort> ShiftRightArithmeticUnsignedAndNarrowRoundedSaturateLower(Vector128<int> value, byte shift);
            Vector64<uint> ShiftRightArithmeticUnsignedAndNarrowRoundedSaturateLower(Vector128<long> value, byte shift);

            /// <summary>
            /// Shift Right Narrow Immediate
            /// For each element result[elem] = value[elem] >> shift
            /// Corresponds to vector forms of SHRN2
            /// </summary>
            Vector128<sbyte> ShiftRightLogicalAndNarrowUpper(Vector64<sbyte> lower, Vector128<short> value, byte shift);
            Vector128<byte> ShiftRightLogicalAndNarrowUpper(Vector64<byte> lower, Vector128<ushort> value, byte shift);
            Vector128<short> ShiftRightLogicalAndNarrowUpper(Vector64<short> lower, Vector128<int> value, byte shift);
            Vector128<ushort> ShiftRightLogicalAndNarrowUpper(Vector64<ushort> lower, Vector128<uint> value, byte shift);
            Vector128<int> ShiftRightLogicalAndNarrowUpper(Vector64<int> lower, Vector128<long> value, byte shift);
            Vector128<uint> ShiftRightLogicalAndNarrowUpper(Vector64<uint> lower, Vector128<ulong> value, byte shift);

            /// <summary>
            /// Rounding Shift Right Narrow Immediate
            /// For each element result[elem] = (value[elem] + (1 << (shift - 1))) >> shift
            /// Corresponds to vector forms of RSHRN2
            /// </summary>
            Vector128<sbyte> ShiftRightLogicalAndNarrowRoundedUpper(Vector64<sbyte> lower, Vector128<short> value, byte shift);
            Vector128<byte> ShiftRightLogicalAndNarrowRoundedUpper(Vector64<byte> lower, Vector128<ushort> value, byte shift);
            Vector128<short> ShiftRightLogicalAndNarrowRoundedUpper(Vector64<short> lower, Vector128<int> value, byte shift);
            Vector128<ushort> ShiftRightLogicalAndNarrowRoundedUpper(Vector64<ushort> lower, Vector128<uint> value, byte shift);
            Vector128<int> ShiftRightLogicalAndNarrowRoundedUpper(Vector64<int> lower, Vector128<long> value, byte shift);
            Vector128<uint> ShiftRightLogicalAndNarrowRoundedUpper(Vector64<uint> lower, Vector128<ulong> value, byte shift);

            /// <summary>
            /// Shift Left Long
            /// For each element result[elem] = value[elem] << shift
            /// Corresponds to vector forms of SHLL2
            /// </summary>
            Vector128<short> ShiftLeftLogicalAndWidenUpper(Vector128<sbyte> value, byte shift);
            Vector128<ushort> ShiftLeftLogicalAndWidenUpper(Vector128<byte> value, byte shift);
            Vector128<int> ShiftLeftLogicalAndWidenUpper(Vector128<short> value, byte shift);
            Vector128<uint> ShiftLeftLogicalAndWidenUpper(Vector128<ushort> value, byte shift);
            Vector128<long> ShiftLeftLogicalAndWidenUpper(Vector128<int> value, byte shift);
            Vector128<ulong> ShiftLeftLogicalAndWidenUpper(Vector128<uint> value, byte shift);

            /// <summary>
            /// Signed Extend Long
            /// For each element result[elem] = value[elem] << shift
            /// Corresponds to vector forms of SXTL2
            /// </summary>
            Vector128<short> SignExtendAndWidenUpper(Vector128<sbyte> value, byte shift);
            Vector128<int> SignExtendAndWidenUpper(Vector128<short> value, byte shift);
            Vector128<long> SignExtendAndWidenUpper(Vector128<int> value, byte shift);

            /// <summary>
            /// Unsigned Extend Long
            /// For each element result[elem] = value[elem] << shift
            /// Corresponds to vector forms of UXTL2
            /// </summary>
            Vector128<short> ZeroExtendAndWidenUpper(Vector128<sbyte> value, byte shift);
            Vector128<ushort> ZeroExtendAndWidenUpper(Vector128<byte> value, byte shift);
            Vector128<int> ZeroExtendAndWidenUpper(Vector128<short> value, byte shift);
            Vector128<uint> ZeroExtendAndWidenUpper(Vector128<ushort> value, byte shift);
            Vector128<long> ZeroExtendAndWidenUpper(Vector128<int> value, byte shift);
            Vector128<ulong> ZeroExtendAndWidenUpper(Vector128<uint> value, byte shift);

            /// <summary>
            /// Unsigned Saturating Shift Right Narrow Immediate
            /// For each element result[elem] = value[elem] >> shift
            /// Corresponds to vector forms of UQSHRN2
            /// </summary>
            Vector128<sbyte> ShiftRightLogicalAndNarrowSaturateUpper(Vector64<sbyte> lower, Vector128<short> value, byte shift);
            Vector128<byte> ShiftRightLogicalAndNarrowSaturateUpper(Vector64<byte> lower, Vector128<ushort> value, byte shift);
            Vector128<short> ShiftRightLogicalAndNarrowSaturateUpper(Vector64<short> lower, Vector128<int> value, byte shift);
            Vector128<ushort> ShiftRightLogicalAndNarrowSaturateUpper(Vector64<ushort> lower, Vector128<uint> value, byte shift);
            Vector128<int> ShiftRightLogicalAndNarrowSaturateUpper(Vector64<int> lower, Vector128<long> value, byte shift);
            Vector128<uint> ShiftRightLogicalAndNarrowSaturateUpper(Vector64<uint> lower, Vector128<ulong> value, byte shift);

            /// <summary>
            /// Unsigned Saturating Rounded Shift Right Narrow Immediate
            /// For each element result[elem] = value[elem] >> shift
            /// Corresponds to vector forms of UQRSHRN2
            /// </summary>
            Vector128<sbyte> ShiftRightLogicalAndNarrowRoundedSaturateUpper(Vector64<sbyte> lower, Vector128<short> value, byte shift);
            Vector128<byte> ShiftRightLogicalAndNarrowRoundedSaturateUpper(Vector64<byte> lower, Vector128<ushort> value, byte shift);
            Vector128<short> ShiftRightLogicalAndNarrowRoundedSaturateUpper(Vector64<short> lower, Vector128<int> value, byte shift);
            Vector128<ushort> ShiftRightLogicalAndNarrowRoundedSaturateUpper(Vector64<ushort> lower, Vector128<uint> value, byte shift);
            Vector128<int> ShiftRightLogicalAndNarrowRoundedSaturateUpper(Vector64<int> lower, Vector128<long> value, byte shift);
            Vector128<uint> ShiftRightLogicalAndNarrowRoundedSaturateUpper(Vector64<uint> lower, Vector128<ulong> value, byte shift);

            /// <summary>
            /// Signed Saturating Shift Right Narrow Immediate
            /// For each element result[elem] = value[elem] >> shift
            /// Corresponds to vector forms of SQSHRN2
            /// </summary>
            Vector128<sbyte> ShiftRightArithmeticAndNarrowSaturateUpper(Vector64<sbyte> lower, Vector128<short> value, byte shift);
            Vector128<short> ShiftRightArithmeticAndNarrowSaturateUpper(Vector64<short> lower, Vector128<int> value, byte shift);
            Vector128<int> ShiftRightArithmeticAndNarrowSaturateUpper(Vector64<int> lower, Vector128<long> value, byte shift);

            /// <summary>
            /// Signed Saturating Rounded Shift Right Narrow Immediate
            /// For each element result[elem] = value[elem] >> shift
            /// Corresponds to vector forms of SQRSHRN2
            /// </summary>
            Vector128<sbyte> ShiftRightArithmeticAndNarrowRoundedSaturateUpper(Vector64<sbyte> lower, Vector128<short> value, byte shift);
            Vector128<short> ShiftRightArithmeticAndNarrowRoundedSaturateUpper(Vector64<short> lower, Vector128<int> value, byte shift);
            Vector128<int> ShiftRightArithmeticAndNarrowRoundedSaturateUpper(Vector64<int> lower, Vector128<long> value, byte shift);

            /// <summary>
            /// Signed Saturating Shift Right Unsigned Narrow Immediate
            /// For each element result[elem] = value[elem] >> shift
            /// Corresponds to vector forms of SQSHRUN2
            /// </summary>
            Vector128<byte> ShiftRightArithmeticUnsignedAndNarrowSaturateUpper(Vector64<sbyte> lower, Vector128<short> value, byte shift);
            Vector128<ushort> ShiftRightArithmeticUnsignedAndNarrowSaturateUpper(Vector64<short> lower, Vector128<int> value, byte shift);
            Vector128<uint> ShiftRightArithmeticUnsignedAndNarrowSaturateUpper(Vector64<int> lower, Vector128<long> value, byte shift);

            /// <summary>
            /// Signed Saturating Rounded Shift Right Unsigned Narrow Immediate
            /// For each element result[elem] = value[elem] >> shift
            /// Corresponds to vector forms of SQRSHRUN2
            /// </summary>
            Vector128<byte> ShiftRightArithmeticUnsignedAndNarrowRoundedSaturateUpper(Vector64<sbyte> lower, Vector128<short> value, byte shift);
            Vector128<ushort> ShiftRightArithmeticUnsignedAndNarrowRoundedSaturateUpper(Vector64<short> lower, Vector128<int> value, byte shift);
            Vector128<uint> ShiftRightArithmeticUnsignedAndNarrowRoundedSaturateUpper(Vector64<int> lower, Vector128<long> value, byte shift);
        }
    }
}
api-approved arch-arm64 area-System.Runtime.Intrinsics

Most helpful comment

Is this because on ARM32 the SIMD & Floating-Point Register File has double the number of V64 registers and those registers map to both halves of the V128 registers? This allows each half to be individually addressed and so SHLL2, for example, can be implemented by treating the given V128 as a V64 and doing +1 on the register index. It also differs from ARM64 where the number of V64 and V128 registers is the same and a V64 only covers the lower half of a V128. Is that correct?

Yup that's exactly it, this also extends downwards to the instructions taking a 32-bit VFP register in AArch32. In a lot of the earlier intrinsics we didn't use this characteristic so they ended up being missing. We have them for the newer intrinsics but the older ones need to be updated.

All 23 comments

CC. @CarolEidt, @echesakovMSFT, @TamarChristinaArm

This is a proposal for some of the remaining intrinsics that aren't yet in an existing proposal. I still have a few more to add to the list, but I wanted to go ahead and get this up while I worked on those.

btw, @tannergooding is there a roadmap for arm intrinsic somewhere?

There's this, but it's out of date: https://github.com/dotnet/runtime/blob/master/docs/design/features/arm64-intrinsics.md

I'm curious about whether the LD[234] and ST[234] will make it in for 5.0

There's this, but it's out of date: https://github.com/dotnet/runtime/blob/master/docs/design/features/arm64-intrinsics.md
I'm curious about whether the LD[234] and ST[234] will make it in for 5.0

@saucecontrol I am working right now to support LD[234] and ST[234] (and also LD[1234]R) in the JIT emitter. However, it's only a small fraction of the work needed to fully support register lists in hardware intrinsics. In particular, LSRA also needs to be able to allocate spans of the registers and this is mush harder problem (this is mentioned in https://github.com/dotnet/runtime/blob/master/docs/design/features/arm64-intrinsics.md#lsra-changes-to-allocate-contiguous-register-ranges).

Finished updating with the remaining shift intrinsics.

@tannergooding Just a general question, is there an easy way to go from Vector128<T> to Vector64<T>? Would just an explicit cast work? The reason I'm asking is that we noticed that for some of the intrinsics it's useful to have the vector64<T> variant. Like for ShiftLeftLogicalAndWidenLow where doing so with a shift of 0 would make it a widening of the low part of the vector but your input may be a Vector128<T>. In C we have to an aweful workaround for this.

Just a general question, is there an easy way to go from Vector128 to Vector64

Yes, you can just do value.GetLower() or value.GetUpper().
Likewise, in the other direction you can do value.ToVector128() or value.ToVector128Unsafe()

These will be special-cased in the JIT to be zero cost where possible (and minimal cost for the target platform otherwise).

Updated the proposal based on naming decisions made in https://github.com/dotnet/runtime/issues/32512.
Moved several of the *Lower variants down to AdvSimd (from AdvSimd.Arm64) based on https://developer.arm.com/architectures/instruction-sets/simd-isas/neon/intrinsics indicating they are also available on A32
Added several *Scalar variants for long/ulong to AdvSimd based on https://developer.arm.com/architectures/instruction-sets/simd-isas/neon/intrinsics indicating they are also available on A32 and A64

@tannergooding I believe the ACLE spec may be incorrect on a few of these. e.g. SHLL2 and SHRN2 are implementable on AArch32 for instance. Can I get a few days to go through these and make corrections to the spec and have them checked?

@tannergooding I believe the ACLE spec may be incorrect on a few of these. e.g. SHLL2 and SHRN2 are implementable on AArch32 for instance. Can I get a few days to go through these and make corrections to the spec and have them checked?

Is this because on ARM32 the SIMD & Floating-Point Register File has double the number of V64 registers and those registers map to both halves of the V128 registers? This allows each half to be individually addressed and so SHLL2, for example, can be implemented by treating the given V128 as a V64 and doing +1 on the register index. It also differs from ARM64 where the number of V64 and V128 registers is the same and a V64 only covers the lower half of a V128. Is that correct?

And feel free to update this issue or any of the others (or open an issue covering any already exposed ISAs) and we can get that updated as appropriate. @terrajobst and @dotnet/fxdc can likely confirm, but we shouldn't need an actual review session to move APIs from AdvSimd.Arm64 to AdvSimd if they actually apply to both architectures (and the relevant APIs have already been reviewed/approved).

Is this because on ARM32 the SIMD & Floating-Point Register File has double the number of V64 registers and those registers map to both halves of the V128 registers? This allows each half to be individually addressed and so SHLL2, for example, can be implemented by treating the given V128 as a V64 and doing +1 on the register index. It also differs from ARM64 where the number of V64 and V128 registers is the same and a V64 only covers the lower half of a V128. Is that correct?

Yup that's exactly it, this also extends downwards to the instructions taking a 32-bit VFP register in AArch32. In a lot of the earlier intrinsics we didn't use this characteristic so they ended up being missing. We have them for the newer intrinsics but the older ones need to be updated.

Just noticed that you have already discussed this :-)

@tannergooding Is it right to say that SSHL with negative shift value is going to act as arithmetic right shift while USHL as logical right shift?

Yes, that was my understanding and the naming I went with reflects that.

That is, if it is explicitly named ShiftLeft* then it will always do a left shift, same with ShiftRight with right shift.
All left shifts are the same and "logical" vs "arithmetic" doesn't necessarily apply, it always inserts zero
While right shifts can be "logical" (insert zero) or "arithmetic" (carry the sign)

If it doesn't specify Left or Right then it takes a Vector as the shift amount and applies it on a per element basis.
SSHL is a "signed shift left" and so if the value is positive it will be a ShiftLeftLogical and if the value is negative it will be a ShiftRightArithmetic
Thus the name for the API is ShiftArithmetic

@TamarChristinaArm might be able to confirm my understanding?

@tannergooding Yeah that's correct. I don't know if you would like to reflect in the name that it's a truncating right shift? since we explicitly put "Rounding" in the rounding ones?

I don't know if you would like to reflect in the name that it's a truncating right shift?

I don't think this is necessary. "truncating" is the default behavior of shifts in .NET (and many languages) and it will likely be less confusing to just call it ShiftLogical and ShiftArithmetic

I went through most of the intrinsics here.

  1. There are no corresponding instructions for the following and these should be removed from the proposal
    ```c#
    Vector64 ShiftArithmeticRoundedScalar(Vector64 value, Vector64 shift);
    Vector64 ShiftArithmeticRoundedScalar(Vector64 value, Vector64 shift);
    Vector64 ShiftArithmeticRoundedScalar(Vector64 value, Vector64 shift);

        Vector64<sbyte> ShiftArithmeticScalar(Vector64<sbyte> value, Vector64<sbyte> shift);
        Vector64<short> ShiftArithmeticScalar(Vector64<short> value, Vector64<short> shift);
        Vector64<int> ShiftArithmeticScalar(Vector64<int> value, Vector64<int> shift);
    
    
        Vector64<byte> ShiftLogicalRoundedScalar(Vector64<byte> value, Vector64<sbyte> shift);
        Vector64<sbyte> ShiftLogicalRoundedScalar(Vector64<sbyte> value, Vector64<sbyte> shift);
        Vector64<short> ShiftLogicalRoundedScalar(Vector64<short> value, Vector64<short> shift);
        Vector64<ushort> ShiftLogicalRoundedScalar(Vector64<ushort> value, Vector64<short> shift);
        Vector64<int> ShiftLogicalRoundedScalar(Vector64<int> value, Vector64<int> shift);
        Vector64<uint> ShiftLogicalRoundedScalar(Vector64<uint> value, Vector64<int> shift);
    
        Vector64<byte> ShiftLogicalScalar(Vector64<byte> value, Vector64<sbyte> shift);
        Vector64<sbyte> ShiftLogicalScalar(Vector64<sbyte> value, Vector64<sbyte> shift);
        Vector64<short> ShiftLogicalScalar(Vector64<short> value, Vector64<short> shift);
        Vector64<ushort> ShiftLogicalScalar(Vector64<ushort> value, Vector64<short> shift);
        Vector64<int> ShiftLogicalScalar(Vector64<int> value, Vector64<int> shift);
        Vector64<uint> ShiftLogicalScalar(Vector64<uint> value, Vector64<int> shift);
    
2. The following are missing
```c#
class AdvSimd.Arm64
{
  // int16_t vqrshrns_n_s32 (int32_t a, const int n)
  //   A64: SQRSHRN Hd, Sn, #n
  public static Vector64<short> ShiftRightArithmeticAndNarrowRoundedSaturateScalar(Vector64<int> value, byte count);

  // int32_t vqrshrnd_n_s64 (int64_t a, const int n)
  //   A64: SQRSHRN Sd, Dn, #n
  public static Vector64<int> ShiftRightArithmeticAndNarrowRoundedSaturateScalar(Vector64<long> value, byte count);

  // int8_t vqrshrnh_n_s16 (int16_t a, const int n)
  //   A64: SQRSHRN Bd, Hn, #n
  public static Vector64<sbyte> ShiftRightArithmeticAndNarrowRoundedSaturateScalar(Vector64<short> value, byte count);

  // int16_t vqshrns_n_s32 (int32_t a, const int n)
  //   A64: SQSHRN Hd, Sn, #n
  public static Vector64<short> ShiftRightArithmeticAndNarrowSaturateScalar(Vector64<int> value, byte count);

  // int32_t vqshrnd_n_s64 (int64_t a, const int n)
  //   A64: SQSHRN Sd, Dn, #n
  public static Vector64<int> ShiftRightArithmeticAndNarrowSaturateScalar(Vector64<long> value, byte count);

  // int8_t vqshrnh_n_s16 (int16_t a, const int n)
  //   A64: SQSHRN Bd, Hn, #n
  public static Vector64<sbyte> ShiftRightArithmeticAndNarrowSaturateScalar(Vector64<short> value, byte count);

  // uint8_t vqrshrunh_n_s16 (int16_t a, const int n)
  //   A64: SQRSHRUN Bd, Hn, #n
  public static Vector64<byte> ShiftRightArithmeticUnsignedAndNarrowRoundedSaturateScalar(Vector64<short> value, byte count);

  // uint16_t vqrshruns_n_s32 (int32_t a, const int n)
  //   A64: SQRSHRUN Hd, Sn, #n
  public static Vector64<ushort> ShiftRightArithmeticUnsignedAndNarrowRoundedSaturateScalar(Vector64<int> value, byte count);

  // uint32_t vqrshrund_n_s64 (int64_t a, const int n)
  //   A64: SQRSHRUN Sd, Dn, #n
  public static Vector64<uint> ShiftRightArithmeticUnsignedAndNarrowRoundedSaturateScalar(Vector64<long> value, byte count);

  // uint8_t vqshrunh_n_s16 (int16_t a, const int n)
  //   A64: SQSHRUN Bd, Hn, #n
  public static Vector64<byte> ShiftRightArithmeticUnsignedAndNarrowSaturateScalar(Vector64<short> value, byte count);

  // uint16_t vqshruns_n_s32 (int32_t a, const int n)
  //   A64: SQSHRUN Hd, Sn, #n
  public static Vector64<ushort> ShiftRightArithmeticUnsignedAndNarrowSaturateScalar(Vector64<int> value, byte count);

  // uint32_t vqshrund_n_s64 (int64_t a, const int n)
  //   A64: SQSHRUN Sd, Dn, #n
  public static Vector64<uint> ShiftRightArithmeticUnsignedAndNarrowSaturateScalar(Vector64<long> value, byte count);

  // uint8_t vqrshrnh_n_u16 (uint16_t a, const int n)
  //   A64: UQRSHRN Bd, Hn, #n
  public static Vector64<byte> ShiftRightLogicalAndNarrowRoundedSaturateScalar(Vector64<ushort> value, byte count);

  // uint16_t vqrshrns_n_u32 (uint32_t a, const int n)
  //   A64: UQRSHRN Hd, Sn, #n
  public static Vector64<ushort> ShiftRightLogicalAndNarrowRoundedSaturateScalar(Vector64<uint> value, byte count);

  // uint32_t vqrshrnd_n_u64 (uint64_t a, const int n)
  //   A64: UQRSHRN Sd, Dn, #n
  public static Vector64<uint> ShiftRightLogicalAndNarrowRoundedSaturateScalar(Vector64<ulong> value, byte count);

  // uint8_t vqshrnh_n_u16 (uint16_t a, const int n)
  //   A64: UQSHRN Bd, Hn, #n
  public static Vector64<byte> ShiftRightLogicalAndNarrowSaturateScalar(Vector64<ushort> value, byte count);

  // uint16_t vqshrns_n_u32 (uint32_t a, const int n)
  //   A64: UQSHRN Hd, Sn, #n
  public static Vector64<ushort> ShiftRightLogicalAndNarrowSaturateScalar(Vector64<uint> value, byte count);

  // uint32_t vqshrnd_n_u64 (uint64_t a, const int n)
  //   A64: UQSHRN Sd, Dn, #n
  public static Vector64<uint> ShiftRightLogicalAndNarrowSaturateScalar(Vector64<ulong> value, byte count);
}
  1. The following should not accept an immediate operand. The underlying instructions are the aliases for SSHLL{2} and USHLL{2} with zero immediate and I am wondering if we really need them.
    ```c#
    ///


    /// Signed Extend Long
    /// For each element result[elem] = value[elem] << shift
    /// Corresponds to vector forms of SXTL
    ///

    Vector128 SignExtendAndWidenLower(Vector64 value, byte shift);
    Vector128 SignExtendAndWidenLower(Vector64 value, byte shift);
    Vector128 SignExtendAndWidenLower(Vector64 value, byte shift);

        /// <summary>
        /// Unsigned Extend Long
        /// For each element result[elem] = value[elem] << shift
        /// Corresponds to vector forms of UXTL
        /// </summary>
        Vector128<short> ZeroExtendAndWidenLower(Vector64<sbyte> value, byte shift);
        Vector128<ushort> ZeroExtendAndWidenLower(Vector64<byte> value, byte shift);
        Vector128<int> ZeroExtendAndWidenLower(Vector64<short> value, byte shift);
        Vector128<uint> ZeroExtendAndWidenLower(Vector64<ushort> value, byte shift);
        Vector128<long> ZeroExtendAndWidenLower(Vector64<int> value, byte shift);
        Vector128<ulong> ZeroExtendAndWidenLower(Vector64<uint> value, byte shift);
    
        /// <summary>
        /// Signed Extend Long
        /// For each element result[elem] = value[elem] << shift
        /// Corresponds to vector forms of SXTL2
        /// </summary>
        Vector128<short> SignExtendAndWidenUpper(Vector128<sbyte> value, byte shift);
        Vector128<int> SignExtendAndWidenUpper(Vector128<short> value, byte shift);
        Vector128<long> SignExtendAndWidenUpper(Vector128<int> value, byte shift);
    
        /// <summary>
        /// Unsigned Extend Long
        /// For each element result[elem] = value[elem] << shift
        /// Corresponds to vector forms of UXTL2
        /// </summary>
        Vector128<short> ZeroExtendAndWidenUpper(Vector128<sbyte> value, byte shift);
        Vector128<ushort> ZeroExtendAndWidenUpper(Vector128<byte> value, byte shift);
        Vector128<int> ZeroExtendAndWidenUpper(Vector128<short> value, byte shift);
        Vector128<uint> ZeroExtendAndWidenUpper(Vector128<ushort> value, byte shift);
        Vector128<long> ZeroExtendAndWidenUpper(Vector128<int> value, byte shift);
        Vector128<ulong> ZeroExtendAndWidenUpper(Vector128<uint> value, byte shift);
    

    ```

  2. As far as I understood, *Upper intrinsics are implementable on A32 and should be under common Arm class. The only intrinsics that should be under Arm64 are *Saturate intrinsics that operate on {u}int8_t, {u}int16_t and {u}int32_t.

  3. I am wondering if we can avoid blowing up the API and define the ShiftLogical intrinsics only for unsigned types, the ShiftArithmetic intrinsics only for signed types and if a user needs to do logical right shift on signed types it still be possible with .As\

  4. On x64 we use count instead of shift - should we keep the name?

cc @TamarChristinaArm @tannergooding

There are no corresponding instructions for the following and these should be removed from the proposal

Ah, I missed the if S == '0' && size != '11'

The following should not accept an immediate operand. The underlying instructions are the aliases for SSHLL{2} and USHLL{2} with zero immediate and I am wondering if we really need them.

That sounds reasonable, we can also reviist and provide convenience overloads later if it is an issue

As far as I understood, *Upper intrinsics are implementable on A32 and should be under common Arm class. The only intrinsics that should be under Arm64 are *Saturate intrinsics that operate on {u}int8_t, {u}int16_t and {u}int32_t.

Yes. This was done before we started updating the other proposals to account for this.

I am wondering if we can avoid blowing up the API and define the ShiftLogical intrinsics only for unsigned types, the ShiftArithmetic intrinsics only for signed types and if a user needs to do logical right shift on signed types it still be possible with .As().

We might be able to do this, but should discuss it more in API review.

On x64 we use count instead of shift - should we keep the name?

I'm fine with using count for consistency

Video

  • We should rename the shift parameter to count
  • What happens when the vector-based counts are too large?
  • Some of the upper variants under Arm64 seem to belong to AdvSimd
  • Do we need SignExtendAndWidenLower, ZeroExtendAndWidenLower, SignExtendAndWidenUpper, SignExtendAndWidenUpper` at all?

    • If we expose them, we need to drop the count parameter

  • Should we change the encoding of methods and instead of making up Markov-chain-like method names take arguments (e.g. bool round or SomeFlags flags)? The concern is metadata size.

```C#
namespace System.Runtime.Intrinsics.Arm
{
public abstract class AdvSimd
{
///


/// Unsigned Shift Left
/// For each element result[elem] = value[elem] << (shift[elem] & 0xFF)
/// Corresponds to vector forms of USHL and VSHL
///

Vector64 ShiftLogical(Vector64 value, Vector64 count);
Vector64 ShiftLogical(Vector64 value, Vector64 count);
Vector64 ShiftLogical(Vector64 value, Vector64 count);
Vector64 ShiftLogical(Vector64 value, Vector64 count);
Vector64 ShiftLogical(Vector64 value, Vector64 count);
Vector64 ShiftLogical(Vector64 value, Vector64 count);
Vector128 ShiftLogical(Vector128 value, Vector128 count);
Vector128 ShiftLogical(Vector128 value, Vector128 count);
Vector128 ShiftLogical(Vector128 value, Vector128 count);
Vector128 ShiftLogical(Vector128 value, Vector128 count);
Vector128 ShiftLogical(Vector128 value, Vector128 count);
Vector128 ShiftLogical(Vector128 value, Vector128 count);
Vector128 ShiftLogical(Vector128 value, Vector128 count);
Vector128 ShiftLogical(Vector128 value, Vector128 count);

    Vector64<long> ShiftLogicalScalar(Vector64<long> value, Vector64<long> count);
    Vector64<ulong> ShiftLogicalScalar(Vector64<ulong> value, Vector64<long> count);

    /// <summary>
    /// Unsigned Rounding Shift Left
    /// For each element result[elem] = (value[elem] + (1 << (-(shift[elem] & 0xFF) - 1))) << (shift[elem] & 0xFF)
    /// Corresponds to vector forms of URSHL and VRSHL
    /// </summary>
    Vector64<byte> ShiftLogicalRounded(Vector64<byte> value, Vector64<sbyte> count);
    Vector64<sbyte> ShiftLogicalRounded(Vector64<sbyte> value, Vector64<sbyte> count);
    Vector64<short> ShiftLogicalRounded(Vector64<short> value, Vector64<short> count);
    Vector64<ushort> ShiftLogicalRounded(Vector64<ushort> value, Vector64<short> count);
    Vector64<int> ShiftLogicalRounded(Vector64<int> value, Vector64<int> count);
    Vector64<uint> ShiftLogicalRounded(Vector64<uint> value, Vector64<int> count);
    Vector128<byte> ShiftLogicalRounded(Vector128<byte> value, Vector128<sbyte> count);
    Vector128<sbyte> ShiftLogicalRounded(Vector128<sbyte> value, Vector128<sbyte> count);
    Vector128<short> ShiftLogicalRounded(Vector128<short> value, Vector128<short> count);
    Vector128<ushort> ShiftLogicalRounded(Vector128<ushort> value, Vector128<short> count);
    Vector128<int> ShiftLogicalRounded(Vector128<int> value, Vector128<int> count);
    Vector128<uint> ShiftLogicalRounded(Vector128<uint> value, Vector128<int> count);
    Vector128<long> ShiftLogicalRounded(Vector128<long> value, Vector128<long> count);
    Vector128<ulong> ShiftLogicalRounded(Vector128<ulong> value, Vector128<long> count);

    Vector64<long> ShiftLogicalRoundedScalar(Vector64<long> value, Vector64<long> count);
    Vector64<ulong> ShiftLogicalRoundedScalar(Vector64<ulong> value, Vector64<long> count);

    /// <summary>
    /// Signed Shift Left
    /// For each element result[elem] = value[elem] << (shift[elem] & 0xFF)
    /// Corresponds to vector forms of SSHL and VSHL
    /// </summary>
    Vector64<sbyte> ShiftArithmetic(Vector64<sbyte> value, Vector64<sbyte> count);
    Vector64<short> ShiftArithmetic(Vector64<short> value, Vector64<short> count);
    Vector64<int> ShiftArithmetic(Vector64<int> value, Vector64<int> count);
    Vector128<sbyte> ShiftArithmetic(Vector128<sbyte> value, Vector128<sbyte> count);
    Vector128<short> ShiftArithmetic(Vector128<short> value, Vector128<short> count);
    Vector128<int> ShiftArithmetic(Vector128<int> value, Vector128<int> count);
    Vector128<long> ShiftArithmetic(Vector128<long> value, Vector128<long> count);

    Vector64<long> ShiftArithmeticScalar(Vector64<long> value, Vector64<long> count);

    /// <summary>
    /// Signed Rounding Shift Left
    /// For each element result[elem] = (value[elem] + (1 << (-(shift[elem] & 0xFF) - 1))) << (shift[elem] & 0xFF)
    /// Corresponds to vector forms of SRSHL and VRSHL
    /// </summary>
    Vector64<sbyte> ShiftArithmeticRounded(Vector64<sbyte> value, Vector64<sbyte> count);
    Vector64<short> ShiftArithmeticRounded(Vector64<short> value, Vector64<short> count);
    Vector64<int> ShiftArithmeticRounded(Vector64<int> value, Vector64<int> count);
    Vector128<sbyte> ShiftArithmeticRounded(Vector128<sbyte> value, Vector128<sbyte> count);
    Vector128<short> ShiftArithmeticRounded(Vector128<short> value, Vector128<short> count);
    Vector128<int> ShiftArithmeticRounded(Vector128<int> value, Vector128<int> count);
    Vector128<long> ShiftArithmeticRounded(Vector128<long> value, Vector128<long> count);

    Vector64<long> ShiftArithmeticRoundedScalar(Vector64<long> value, Vector64<long> count);

    /// <summary>
    /// Unsigned Saturating Shift Left
    /// For each element result[elem] = value[elem] << (ShiftLogical[elem] & 0xFF)
    /// Corresponds to vector forms of UQSHL and VQSHL
    /// </summary>
    Vector64<byte> ShiftLogicalSaturate(Vector64<byte> value, Vector64<sbyte> count);
    Vector64<sbyte> ShiftLogicalSaturate(Vector64<sbyte> value, Vector64<sbyte> count);
    Vector64<short> ShiftLogicalSaturate(Vector64<short> value, Vector64<short> count);
    Vector64<ushort> ShiftLogicalSaturate(Vector64<ushort> value, Vector64<short> count);
    Vector64<int> ShiftLogicalSaturate(Vector64<int> value, Vector64<int> count);
    Vector64<uint> ShiftLogicalSaturate(Vector64<uint> value, Vector64<int> count);
    Vector128<byte> ShiftLogicalSaturate(Vector128<byte> value, Vector128<sbyte> count);
    Vector128<sbyte> ShiftLogicalSaturate(Vector128<sbyte> value, Vector128<sbyte> count);
    Vector128<short> ShiftLogicalSaturate(Vector128<short> value, Vector128<short> count);
    Vector128<ushort> ShiftLogicalSaturate(Vector128<ushort> value, Vector128<short> count);
    Vector128<int> ShiftLogicalSaturate(Vector128<int> value, Vector128<int> count);
    Vector128<uint> ShiftLogicalSaturate(Vector128<uint> value, Vector128<int> count);
    Vector128<long> ShiftLogicalSaturate(Vector128<long> value, Vector128<long> count);
    Vector128<ulong> ShiftLogicalSaturate(Vector128<ulong> value, Vector128<long> count);

    Vector64<long> ShiftLogicalSaturateScalar(Vector64<long> value, Vector64<long> count);
    Vector64<ulong> ShiftLogicalSaturateScalar(Vector64<ulong> value, Vector64<long> count);

    /// <summary>
    /// Unsigned Saturating Rounding Shift Left
    /// For each element result[elem] = (value[elem] + (1 << (-(ShiftLogical[elem] & 0xFF) - 1))) << (ShiftLogical[elem] & 0xFF)
    /// Corresponds to vector forms of UQRSHL and VQRSHL
    /// </summary>
    Vector64<byte> ShiftLogicalRoundedSaturate(Vector64<byte> value, Vector64<sbyte> count);
    Vector64<sbyte> ShiftLogicalRoundedSaturate(Vector64<sbyte> value, Vector64<sbyte> count);
    Vector64<short> ShiftLogicalRoundedSaturate(Vector64<short> value, Vector64<short> count);
    Vector64<ushort> ShiftLogicalRoundedSaturate(Vector64<ushort> value, Vector64<short> count);
    Vector64<int> ShiftLogicalRoundedSaturate(Vector64<int> value, Vector64<int> count);
    Vector64<uint> ShiftLogicalRoundedSaturate(Vector64<uint> value, Vector64<int> count);
    Vector128<byte> ShiftLogicalRoundedSaturate(Vector128<byte> value, Vector128<sbyte> count);
    Vector128<sbyte> ShiftLogicalRoundedSaturate(Vector128<sbyte> value, Vector128<sbyte> count);
    Vector128<short> ShiftLogicalRoundedSaturate(Vector128<short> value, Vector128<short> count);
    Vector128<ushort> ShiftLogicalRoundedSaturate(Vector128<ushort> value, Vector128<short> count);
    Vector128<int> ShiftLogicalRoundedSaturate(Vector128<int> value, Vector128<int> count);
    Vector128<uint> ShiftLogicalRoundedSaturate(Vector128<uint> value, Vector128<int> count);
    Vector128<long> ShiftLogicalRoundedSaturate(Vector128<long> value, Vector128<long> count);
    Vector128<ulong> ShiftLogicalRoundedSaturate(Vector128<ulong> value, Vector128<long> count);

    Vector64<long> ShiftLogicalRoundedSaturateScalar(Vector64<long> value, Vector64<long> count);
    Vector64<ulong> ShiftLogicalRoundedSaturateScalar(Vector64<ulong> value, Vector64<long> count);

    /// <summary>
    /// Signed Saturating Shift Left
    /// For each element result[elem] = value[elem] << (ShiftLogical[elem] & 0xFF)
    /// Corresponds to vector forms of SQSHL and VQSHL
    /// </summary>
    Vector64<sbyte> ShiftArithmeticSaturate(Vector64<sbyte> value, Vector64<sbyte> count);
    Vector64<short> ShiftArithmeticSaturate(Vector64<short> value, Vector64<short> count);
    Vector64<int> ShiftArithmeticSaturate(Vector64<int> value, Vector64<int> count);
    Vector128<sbyte> ShiftArithmeticSaturate(Vector128<sbyte> value, Vector128<sbyte> count);
    Vector128<short> ShiftArithmeticSaturate(Vector128<short> value, Vector128<short> count);
    Vector128<int> ShiftArithmeticSaturate(Vector128<int> value, Vector128<int> count);
    Vector128<long> ShiftArithmeticSaturate(Vector128<long> value, Vector128<long> count);

    Vector64<long> ShiftArithmeticSaturateScalar(Vector64<long> value, Vector64<long> count);

    /// <summary>
    /// Signed Saturating Rounding Shift Left
    /// For each element result[elem] = (value[elem] + (1 << (-(ShiftLogical[elem] & 0xFF) - 1))) << (ShiftLogical[elem] & 0xFF)
    /// Corresponds to vector forms of SQRSHL and VQRSHL
    /// </summary>
    Vector64<sbyte> ShiftArithmeticRoundedSaturate(Vector64<sbyte> value, Vector64<sbyte> count);
    Vector64<short> ShiftArithmeticRoundedSaturate(Vector64<short> value, Vector64<short> count);
    Vector64<int> ShiftArithmeticRoundedSaturate(Vector64<int> value, Vector64<int> count);
    Vector128<sbyte> ShiftArithmeticRoundedSaturate(Vector128<sbyte> value, Vector128<sbyte> count);
    Vector128<short> ShiftArithmeticRoundedSaturate(Vector128<short> value, Vector128<short> count);
    Vector128<int> ShiftArithmeticRoundedSaturate(Vector128<int> value, Vector128<int> count);
    Vector128<long> ShiftArithmeticRoundedSaturate(Vector128<long> value, Vector128<long> count);

    Vector64<long> ShiftArithmeticRoundedSaturateScalar(Vector64<long> value, Vector64<long> count);

    /// <summary>
    /// Shift Left Immediate
    /// For each element result[elem] = value[elem] << shift
    /// Corresponds to vector forms of SHL and VSHL
    /// </summary>
    Vector64<byte> ShiftLeftLogical(Vector64<byte> value, byte count);
    Vector64<sbyte> ShiftLeftLogical(Vector64<sbyte> value, byte count);
    Vector64<short> ShiftLeftLogical(Vector64<short> value, byte count);
    Vector64<ushort> ShiftLeftLogical(Vector64<ushort> value, byte count);
    Vector64<int> ShiftLeftLogical(Vector64<int> value, byte count);
    Vector64<uint> ShiftLeftLogical(Vector64<uint> value, byte count);
    Vector128<byte> ShiftLeftLogical(Vector128<byte> value, byte count);
    Vector128<sbyte> ShiftLeftLogical(Vector128<sbyte> value, byte count);
    Vector128<short> ShiftLeftLogical(Vector128<short> value, byte count);
    Vector128<ushort> ShiftLeftLogical(Vector128<ushort> value, byte count);
    Vector128<int> ShiftLeftLogical(Vector128<int> value, byte count);
    Vector128<uint> ShiftLeftLogical(Vector128<uint> value, byte count);
    Vector128<long> ShiftLeftLogical(Vector128<long> value, byte count);
    Vector128<ulong> ShiftLeftLogical(Vector128<ulong> value, byte count);

    Vector64<long> ShiftLeftLogicalScalar(Vector64<long> value, byte count);
    Vector64<ulong> ShiftLeftLogicalScalar(Vector64<ulong> value, byte count);

    /// <summary>
    /// Unsigned Shift Right Immediate
    /// For each element result[elem] = value[elem] >> shift
    /// Corresponds to vector forms of USHR and VSHR
    /// </summary>
    Vector64<byte> ShiftRightLogical(Vector64<byte> value, byte count);
    Vector64<sbyte> ShiftRightLogical(Vector64<sbyte> value, byte count);
    Vector64<short> ShiftRightLogical(Vector64<short> value, byte count);
    Vector64<ushort> ShiftRightLogical(Vector64<ushort> value, byte count);
    Vector64<int> ShiftRightLogical(Vector64<int> value, byte count);
    Vector64<uint> ShiftRightLogical(Vector64<uint> value, byte count);
    Vector128<byte> ShiftRightLogical(Vector128<byte> value, byte count);
    Vector128<sbyte> ShiftRightLogical(Vector128<sbyte> value, byte count);
    Vector128<short> ShiftRightLogical(Vector128<short> value, byte count);
    Vector128<ushort> ShiftRightLogical(Vector128<ushort> value, byte count);
    Vector128<int> ShiftRightLogical(Vector128<int> value, byte count);
    Vector128<uint> ShiftRightLogical(Vector128<uint> value, byte count);
    Vector128<long> ShiftRightLogical(Vector128<long> value, byte count);
    Vector128<ulong> ShiftRightLogical(Vector128<ulong> value, byte count);

    Vector64<long> ShiftRightLogicalScalar(Vector64<long> value, byte count);
    Vector64<ulong> ShiftRightLogicalScalar(Vector64<ulong> value, byte count);

    /// <summary>
    /// Unsigned Rounding Shift Right Immediate
    /// For each element result[elem] = (value[elem] + (1 << (shift - 1))) >> shift
    /// Corresponds to vector forms of URSHR and VRSHR
    /// </summary>
    Vector64<byte> ShiftRightLogicalRounded(Vector64<byte> value, byte count);
    Vector64<sbyte> ShiftRightLogicalRounded(Vector64<sbyte> value, byte count);
    Vector64<short> ShiftRightLogicalRounded(Vector64<short> value, byte count);
    Vector64<ushort> ShiftRightLogicalRounded(Vector64<ushort> value, byte count);
    Vector64<int> ShiftRightLogicalRounded(Vector64<int> value, byte count);
    Vector64<uint> ShiftRightLogicalRounded(Vector64<uint> value, byte count);
    Vector128<byte> ShiftRightLogicalRounded(Vector128<byte> value, byte count);
    Vector128<sbyte> ShiftRightLogicalRounded(Vector128<sbyte> value, byte count);
    Vector128<short> ShiftRightLogicalRounded(Vector128<short> value, byte count);
    Vector128<ushort> ShiftRightLogicalRounded(Vector128<ushort> value, byte count);
    Vector128<int> ShiftRightLogicalRounded(Vector128<int> value, byte count);
    Vector128<uint> ShiftRightLogicalRounded(Vector128<uint> value, byte count);
    Vector128<long> ShiftRightLogicalRounded(Vector128<long> value, byte count);
    Vector128<ulong> ShiftRightLogicalRounded(Vector128<ulong> value, byte count);

    Vector64<long> ShiftRightLogicalRoundedScalar(Vector64<long> value, byte count);
    Vector64<ulong> ShiftRightLogicalRoundedScalar(Vector64<ulong> value, byte count);

    /// <summary>
    /// Signed Shift Right Immediate
    /// For each element result[elem] = value[elem] >> shift
    /// Corresponds to vector forms of SSHR and VSHR
    /// </summary>
    Vector64<sbyte> ShiftRightArithmetic(Vector64<sbyte> value, byte count);
    Vector64<short> ShiftRightArithmetic(Vector64<short> value, byte count);
    Vector64<int> ShiftRightArithmetic(Vector64<int> value, byte count);
    Vector128<sbyte> ShiftRightArithmetic(Vector128<sbyte> value, byte count);
    Vector128<short> ShiftRightArithmetic(Vector128<short> value, byte count);
    Vector128<int> ShiftRightArithmetic(Vector128<int> value, byte count);
    Vector128<long> ShiftRightArithmetic(Vector128<long> value, byte count);

    Vector64<long> ShiftRightArithmeticScalar(Vector64<long> value, byte count);

    /// <summary>
    /// Signed Rounding Shift Right Immediate
    /// For each element result[elem] = (value[elem] + (1 << (shift - 1))) >> shift
    /// Corresponds to vector forms of SRSHR and VRSHR
    /// </summary>
    Vector64<sbyte> ShiftRightArithmeticRounded(Vector64<sbyte> value, byte count);
    Vector64<short> ShiftRightArithmeticRounded(Vector64<short> value, byte count);
    Vector64<int> ShiftRightArithmeticRounded(Vector64<int> value, byte count);
    Vector128<sbyte> ShiftRightArithmeticRounded(Vector128<sbyte> value, byte count);
    Vector128<short> ShiftRightArithmeticRounded(Vector128<short> value, byte count);
    Vector128<int> ShiftRightArithmeticRounded(Vector128<int> value, byte count);
    Vector128<long> ShiftRightArithmeticRounded(Vector128<long> value, byte count);

    Vector64<long> ShiftRightArithmeticRoundedScalar(Vector64<long> value, byte count);

    /// <summary>
    /// Unsigned Shift Right and Accumulate
    /// For each element result[elem] = addend[elem] + (value[elem] >> shift)
    /// Corresponds to vector forms of USRA and VSRA
    /// </summary>
    Vector64<byte> ShiftRightLogicalAdd(Vector64<byte> addend, Vector64<byte> value, byte count);
    Vector64<sbyte> ShiftRightLogicalAdd(Vector64<sbyte> addend, Vector64<sbyte> value, byte count);
    Vector64<short> ShiftRightLogicalAdd(Vector64<short> addend, Vector64<short> value, byte count);
    Vector64<ushort> ShiftRightLogicalAdd(Vector64<ushort> addend, Vector64<ushort> value, byte count);
    Vector64<int> ShiftRightLogicalAdd(Vector64<int> addend, Vector64<int> value, byte count);
    Vector64<uint> ShiftRightLogicalAdd(Vector64<uint> addend, Vector64<uint> value, byte count);
    Vector128<byte> ShiftRightLogicalAdd(Vector128<byte> addend, Vector128<byte> value, byte count);
    Vector128<sbyte> ShiftRightLogicalAdd(Vector128<sbyte> addend, Vector128<sbyte> value, byte count);
    Vector128<short> ShiftRightLogicalAdd(Vector128<short> addend, Vector128<short> value, byte count);
    Vector128<ushort> ShiftRightLogicalAdd(Vector128<ushort> addend, Vector128<ushort> value, byte count);
    Vector128<int> ShiftRightLogicalAdd(Vector128<int> addend, Vector128<int> value, byte count);
    Vector128<uint> ShiftRightLogicalAdd(Vector128<uint> addend, Vector128<uint> value, byte count);
    Vector128<long> ShiftRightLogicalAdd(Vector128<long> addend, Vector128<long> value, byte count);
    Vector128<ulong> ShiftRightLogicalAdd(Vector128<ulong> addend, Vector128<ulong> value, byte count);

    Vector64<long> ShiftRightLogicalAddScalar(Vector64<long> addend, Vector64<long> value, byte count);
    Vector64<ulong> ShiftRightLogicalAddScalar(Vector64<ulong> addend, Vector64<ulong> value, byte count);

    /// <summary>
    /// Signed Shift Right and Accumulate
    /// For each element result[elem] = addend[elem] + (value[elem] >> shift)
    /// Corresponds to vector forms of SSRA and VSRA
    /// </summary>
    Vector64<sbyte> ShiftRightArithmeticAdd(Vector64<sbyte> addend, Vector64<sbyte> value, byte count);
    Vector64<short> ShiftRightArithmeticAdd(Vector64<short> addend, Vector64<short> value, byte count);
    Vector64<int> ShiftRightArithmeticAdd(Vector64<int> addend, Vector64<int> value, byte count);
    Vector128<sbyte> ShiftRightArithmeticAdd(Vector128<sbyte> addend, Vector128<sbyte> value, byte count);
    Vector128<short> ShiftRightArithmeticAdd(Vector128<short> addend, Vector128<short> value, byte count);
    Vector128<int> ShiftRightArithmeticAdd(Vector128<int> addend, Vector128<int> value, byte count);
    Vector128<long> ShiftRightArithmeticAdd(Vector128<long> addend, Vector128<long> value, byte count);

    Vector64<long> ShiftRightArithmeticAddScalar(Vector64<long> addend, Vector64<long> value, byte count);

    /// <summary>
    /// Unsigned Rounding Shift Right and Accumulate
    /// For each element result[elem] = addend[elem] + (value[elem] >> RoundedShift)
    /// Corresponds to vector forms of URSRA and VRSRA
    /// </summary>
    Vector64<byte> ShiftRightLogicalAddRounded(Vector64<byte> addend, Vector64<byte> value, byte count);
    Vector64<sbyte> ShiftRightLogicalAddRounded(Vector64<sbyte> addend, Vector64<sbyte> value, byte count);
    Vector64<short> ShiftRightLogicalAddRounded(Vector64<short> addend, Vector64<short> value, byte count);
    Vector64<ushort> ShiftRightLogicalAddRounded(Vector64<ushort> addend, Vector64<ushort> value, byte count);
    Vector64<int> ShiftRightLogicalAddRounded(Vector64<int> addend, Vector64<int> value, byte count);
    Vector64<uint> ShiftRightLogicalAddRounded(Vector64<uint> addend, Vector64<uint> value, byte count);
    Vector128<byte> ShiftRightLogicalAddRounded(Vector128<byte> addend, Vector128<byte> value, byte count);
    Vector128<sbyte> ShiftRightLogicalAddRounded(Vector128<sbyte> addend, Vector128<sbyte> value, byte count);
    Vector128<short> ShiftRightLogicalAddRounded(Vector128<short> addend, Vector128<short> value, byte count);
    Vector128<ushort> ShiftRightLogicalAddRounded(Vector128<ushort> addend, Vector128<ushort> value, byte count);
    Vector128<int> ShiftRightLogicalAddRounded(Vector128<int> addend, Vector128<int> value, byte count);
    Vector128<uint> ShiftRightLogicalAddRounded(Vector128<uint> addend, Vector128<uint> value, byte count);
    Vector128<long> ShiftRightLogicalAddRounded(Vector128<long> addend, Vector128<long> value, byte count);
    Vector128<ulong> ShiftRightLogicalAddRounded(Vector128<ulong> addend, Vector128<ulong> value, byte count);

    Vector64<long> ShiftRightLogicalAddRoundedScalar(Vector64<long> addend, Vector64<long> value, byte count);
    Vector64<ulong> ShiftRightLogicalAddRoundedScalar(Vector64<ulong> addend, Vector64<ulong> value, byte count);

    /// <summary>
    /// Signed Rounding Shift Right and Accumulate
    /// For each element result[elem] = addend[elem] + (value[elem] >> RoundedShift)
    /// Corresponds to vector forms of SRSRA and VRSRA
    /// </summary>
    Vector64<sbyte> ShiftRightArithmeticAddRounded(Vector64<sbyte> addend, Vector64<sbyte> value, byte count);
    Vector64<short> ShiftRightArithmeticAddRounded(Vector64<short> addend, Vector64<short> value, byte count);
    Vector64<int> ShiftRightArithmeticAddRounded(Vector64<int> addend, Vector64<int> value, byte count);
    Vector128<sbyte> ShiftRightArithmeticAddRounded(Vector128<sbyte> addend, Vector128<sbyte> value, byte count);
    Vector128<short> ShiftRightArithmeticAddRounded(Vector128<short> addend, Vector128<short> value, byte count);
    Vector128<int> ShiftRightArithmeticAddRounded(Vector128<int> addend, Vector128<int> value, byte count);
    Vector128<long> ShiftRightArithmeticAddRounded(Vector128<long> addend, Vector128<long> value, byte count);

    Vector64<long> ShiftRightArithmeticAddRoundedScalar(Vector64<long> addend, Vector64<long> value, byte count);

    /// <summary>
    /// Signed Saturating Shift Left and Unsigned Saturating Shift Left
    /// For each element result[elem] = value[elem] << shift
    /// Corresponds to vector forms of SQSHL, UQSHL, and VQSHL
    /// </summary>
    Vector64<byte> ShiftLeftLogicalSaturate(Vector64<byte> value, byte count);
    Vector64<sbyte> ShiftLeftLogicalSaturate(Vector64<sbyte> value, byte count);
    Vector64<short> ShiftLeftLogicalSaturate(Vector64<short> value, byte count);
    Vector64<ushort> ShiftLeftLogicalSaturate(Vector64<ushort> value, byte count);
    Vector64<int> ShiftLeftLogicalSaturate(Vector64<int> value, byte count);
    Vector64<uint> ShiftLeftLogicalSaturate(Vector64<uint> value, byte count);
    Vector128<byte> ShiftLeftLogicalSaturate(Vector128<byte> value, byte count);
    Vector128<sbyte> ShiftLeftLogicalSaturate(Vector128<sbyte> value, byte count);
    Vector128<short> ShiftLeftLogicalSaturate(Vector128<short> value, byte count);
    Vector128<ushort> ShiftLeftLogicalSaturate(Vector128<ushort> value, byte count);
    Vector128<int> ShiftLeftLogicalSaturate(Vector128<int> value, byte count);
    Vector128<uint> ShiftLeftLogicalSaturate(Vector128<uint> value, byte count);
    Vector128<long> ShiftLeftLogicalSaturate(Vector128<long> value, byte count);
    Vector128<ulong> ShiftLeftLogicalSaturate(Vector128<ulong> value, byte count);

    Vector64<long> ShiftLeftLogicalSaturateScalar(Vector64<long> value, byte count);
    Vector64<ulong> ShiftLeftLogicalSaturateScalar(Vector64<ulong> value, byte count);

    /// <summary>
    /// Signed Saturating Shift Left Unsigned
    /// For each element result[elem] = value[elem] << shift
    /// Corresponds to vector forms of SQSHLU and VQSHLU
    /// </summary>
    Vector64<byte> ShiftLeftLogicalSaturateUnsigned(Vector64<sbyte> value, byte count);
    Vector64<ushort> ShiftLeftLogicalSaturateUnsigned(Vector64<short> value, byte count);
    Vector64<uint> ShiftLeftLogicalSaturateUnsigned(Vector64<int> value, byte count);
    Vector128<byte> ShiftLeftLogicalSaturateUnsigned(Vector128<sbyte> value, byte count);
    Vector128<ushort> ShiftLeftLogicalSaturateUnsigned(Vector128<short> value, byte count);
    Vector128<uint> ShiftLeftLogicalSaturateUnsigned(Vector128<int> value, byte count);
    Vector128<ulong> ShiftLeftLogicalSaturateUnsigned(Vector128<long> value, byte count);

    Vector64<ulong> ShiftLeftLogicalSaturateUnsignedScalar(Vector64<long> value, byte count);

    /// <summary>
    /// Shift Right Narrow Immediate
    /// For each element result[elem] = value[elem] >> shift
    /// Corresponds to vector forms of SHRN and VSHRN
    /// </summary>
    Vector64<sbyte> ShiftRightLogicalAndNarrowLower(Vector128<short> value, byte count);
    Vector64<byte> ShiftRightLogicalAndNarrowLower(Vector128<ushort> value, byte count);
    Vector64<short> ShiftRightLogicalAndNarrowLower(Vector128<int> value, byte count);
    Vector64<ushort> ShiftRightLogicalAndNarrowLower(Vector128<uint> value, byte count);
    Vector64<int> ShiftRightLogicalAndNarrowLower(Vector128<long> value, byte count);
    Vector64<uint> ShiftRightLogicalAndNarrowLower(Vector128<ulong> value, byte count);

    /// <summary>
    /// Rounding Shift Right Narrow Immediate
    /// For each element result[elem] = (value[elem] + (1 << (shift - 1))) >> shift
    /// Corresponds to vector forms of RSHRN and VRSHRN
    /// </summary>
    Vector64<sbyte> ShiftRightLogicalAndNarrowRoundedLower(Vector128<short> value, byte count);
    Vector64<byte> ShiftRightLogicalAndNarrowRoundedLower(Vector128<ushort> value, byte count);
    Vector64<short> ShiftRightLogicalAndNarrowRoundedLower(Vector128<int> value, byte count);
    Vector64<ushort> ShiftRightLogicalAndNarrowRoundedLower(Vector128<uint> value, byte count);
    Vector64<int> ShiftRightLogicalAndNarrowRoundedLower(Vector128<long> value, byte count);
    Vector64<uint> ShiftRightLogicalAndNarrowRoundedLower(Vector128<ulong> value, byte count);

    /// <summary>
    /// Shift Left Long
    /// For each element result[elem] = value[elem] << shift
    /// Corresponds to vector forms of SHLL and VSHLL
    /// </summary>
    Vector128<short> ShiftLeftLogicalAndWidenLower(Vector64<sbyte> value, byte count);
    Vector128<ushort> ShiftLeftLogicalAndWidenLower(Vector64<byte> value, byte count);
    Vector128<int> ShiftLeftLogicalAndWidenLower(Vector64<short> value, byte count);
    Vector128<uint> ShiftLeftLogicalAndWidenLower(Vector64<ushort> value, byte count);
    Vector128<long> ShiftLeftLogicalAndWidenLower(Vector64<int> value, byte count);
    Vector128<ulong> ShiftLeftLogicalAndWidenLower(Vector64<uint> value, byte count);

    /// <summary>
    /// Unsigned Saturating Shift Right Narrow Immediate
    /// For each element result[elem] = value[elem] >> shift
    /// Corresponds to vector forms of UQSHRN and VQSHRUN
    /// </summary>
    Vector64<sbyte> ShiftRightLogicalAndNarrowSaturateLower(Vector128<short> value, byte count);
    Vector64<byte> ShiftRightLogicalAndNarrowSaturateLower(Vector128<ushort> value, byte count);
    Vector64<short> ShiftRightLogicalAndNarrowSaturateLower(Vector128<int> value, byte count);
    Vector64<ushort> ShiftRightLogicalAndNarrowSaturateLower(Vector128<uint> value, byte count);
    Vector64<int> ShiftRightLogicalAndNarrowSaturateLower(Vector128<long> value, byte count);
    Vector64<uint> ShiftRightLogicalAndNarrowSaturateLower(Vector128<ulong> value, byte count);

    /// <summary>
    /// Unsigned Saturating Rounded Shift Right Narrow Immediate
    /// For each element result[elem] = value[elem] >> shift
    /// Corresponds to vector forms of UQRSHRN and VQRSHRUN
    /// </summary>
    Vector64<sbyte> ShiftRightLogicalAndNarrowRoundedSaturateLower(Vector128<short> value, byte count);
    Vector64<byte> ShiftRightLogicalAndNarrowRoundedSaturateLower(Vector128<ushort> value, byte count);
    Vector64<short> ShiftRightLogicalAndNarrowRoundedSaturateLower(Vector128<int> value, byte count);
    Vector64<ushort> ShiftRightLogicalAndNarrowRoundedSaturateLower(Vector128<uint> value, byte count);
    Vector64<int> ShiftRightLogicalAndNarrowRoundedSaturateLower(Vector128<long> value, byte count);
    Vector64<uint> ShiftRightLogicalAndNarrowRoundedSaturateLower(Vector128<ulong> value, byte count);

    /// <summary>
    /// Signed Saturating Shift Right Narrow Immediate
    /// For each element result[elem] = value[elem] >> shift
    /// Corresponds to vector forms of SQSHRN and VQSHRN
    /// </summary>
    Vector64<sbyte> ShiftRightArithmeticAndNarrowSaturateLower(Vector128<short> value, byte count);
    Vector64<short> ShiftRightArithmeticAndNarrowSaturateLower(Vector128<int> value, byte count);
    Vector64<int> ShiftRightArithmeticAndNarrowSaturateLower(Vector128<long> value, byte count);

    /// <summary>
    /// Signed Saturating Rounded Shift Right Narrow Immediate
    /// For each element result[elem] = value[elem] >> shift
    /// Corresponds to vector forms of SQRSHRN and VQRSHRN
    /// </summary>
    Vector64<sbyte> ShiftRightArithmeticAndNarrowRoundedSaturateLower(Vector128<short> value, byte count);
    Vector64<short> ShiftRightArithmeticAndNarrowRoundedSaturateLower(Vector128<int> value, byte count);
    Vector64<int> ShiftRightArithmeticAndNarrowRoundedSaturateLower(Vector128<long> value, byte count);

    public abstract class Arm64
    {
        /// <summary>
        /// Signed Saturating Rounding Shift Left
        /// For each element result[elem] = (value[elem] + (1 << (-(ShiftLogical[elem] & 0xFF) - 1))) << (ShiftLogical[elem] & 0xFF)
        /// Corresponds to vector forms of SQRSHL
        /// </summary>
        Vector64<sbyte> ShiftArithmeticRoundedSaturateScalar(Vector64<sbyte> value, Vector64<sbyte> count);
        Vector64<short> ShiftArithmeticRoundedSaturateScalar(Vector64<short> value, Vector64<short> count);
        Vector64<int> ShiftArithmeticRoundedSaturateScalar(Vector64<int> value, Vector64<int> count);

        /// <summary>
        /// Signed Saturating Shift Left
        /// For each element result[elem] = value[elem] << (ShiftLogical[elem] & 0xFF)
        /// Corresponds to vector forms of SQSHL
        /// </summary>
        Vector64<sbyte> ShiftArithmeticSaturateScalar(Vector64<sbyte> value, Vector64<sbyte> count);
        Vector64<short> ShiftArithmeticSaturateScalar(Vector64<short> value, Vector64<short> count);
        Vector64<int> ShiftArithmeticSaturateScalar(Vector64<int> value, Vector64<int> count);

        /// <summary>
        /// Signed Rounding Shift Left
        /// For each element result[elem] = (value[elem] + (1 << (-(shift[elem] & 0xFF) - 1))) << (shift[elem] & 0xFF)
        /// Corresponds to vector forms of SRSHL
        /// </summary>
        Vector64<sbyte> ShiftArithmeticRoundedScalar(Vector64<sbyte> value, Vector64<sbyte> count);
        Vector64<short> ShiftArithmeticRoundedScalar(Vector64<short> value, Vector64<short> count);
        Vector64<int> ShiftArithmeticRoundedScalar(Vector64<int> value, Vector64<int> count);

        /// <summary>
        /// Signed Shift Left
        /// For each element result[elem] = value[elem] << (shift[elem] & 0xFF)
        /// Corresponds to vector forms of SSHL
        /// </summary>
        Vector64<sbyte> ShiftArithmeticScalar(Vector64<sbyte> value, Vector64<sbyte> count);
        Vector64<short> ShiftArithmeticScalar(Vector64<short> value, Vector64<short> count);
        Vector64<int> ShiftArithmeticScalar(Vector64<int> value, Vector64<int> count);

        /// <summary>
        /// Unsigned Saturating Rounding Shift Left
        /// For each element result[elem] = (value[elem] + (1 << (-(ShiftLogical[elem] & 0xFF) - 1))) << (ShiftLogical[elem] & 0xFF)
        /// Corresponds to vector forms of UQRSHL and VQRSHL
        /// </summary>
        Vector64<byte> ShiftLogicalRoundedSaturateScalar(Vector64<byte> value, Vector64<sbyte> count);
        Vector64<sbyte> ShiftLogicalRoundedSaturateScalar(Vector64<sbyte> value, Vector64<sbyte> count);
        Vector64<short> ShiftLogicalRoundedSaturateScalar(Vector64<short> value, Vector64<short> count);
        Vector64<ushort> ShiftLogicalRoundedSaturateScalar(Vector64<ushort> value, Vector64<short> count);
        Vector64<int> ShiftLogicalRoundedSaturateScalar(Vector64<int> value, Vector64<int> count);
        Vector64<uint> ShiftLogicalRoundedSaturateScalar(Vector64<uint> value, Vector64<int> count);

        /// <summary>
        /// Unsigned Saturating Shift Left
        /// For each element result[elem] = value[elem] << (ShiftLogical[elem] & 0xFF)
        /// Corresponds to vector forms of UQSHL and VQSHL
        /// </summary>
        Vector64<byte> ShiftLogicalSaturateScalar(Vector64<byte> value, Vector64<sbyte> count);
        Vector64<sbyte> ShiftLogicalSaturateScalar(Vector64<sbyte> value, Vector64<sbyte> count);
        Vector64<short> ShiftLogicalSaturateScalar(Vector64<short> value, Vector64<short> count);
        Vector64<ushort> ShiftLogicalSaturateScalar(Vector64<ushort> value, Vector64<short> count);
        Vector64<int> ShiftLogicalSaturateScalar(Vector64<int> value, Vector64<int> count);
        Vector64<uint> ShiftLogicalSaturateScalar(Vector64<uint> value, Vector64<int> count);

        /// <summary>
        /// Unsigned Rounding Shift Left
        /// For each element result[elem] = (value[elem] + (1 << (-(shift[elem] & 0xFF) - 1))) << (shift[elem] & 0xFF)
        /// Corresponds to vector forms of URSHL and VRSHL
        /// </summary>
        Vector64<byte> ShiftLogicalRoundedScalar(Vector64<byte> value, Vector64<sbyte> count);
        Vector64<sbyte> ShiftLogicalRoundedScalar(Vector64<sbyte> value, Vector64<sbyte> count);
        Vector64<short> ShiftLogicalRoundedScalar(Vector64<short> value, Vector64<short> count);
        Vector64<ushort> ShiftLogicalRoundedScalar(Vector64<ushort> value, Vector64<short> count);
        Vector64<int> ShiftLogicalRoundedScalar(Vector64<int> value, Vector64<int> count);
        Vector64<uint> ShiftLogicalRoundedScalar(Vector64<uint> value, Vector64<int> count);


        /// <summary>
        /// Unsigned Shift Left
        /// For each element result[elem] = value[elem] << (shift[elem] & 0xFF)
        /// Corresponds to vector forms of USHL and VSHL
        /// </summary>
        Vector64<byte> ShiftLogicalScalar(Vector64<byte> value, Vector64<sbyte> count);
        Vector64<sbyte> ShiftLogicalScalar(Vector64<sbyte> value, Vector64<sbyte> count);
        Vector64<short> ShiftLogicalScalar(Vector64<short> value, Vector64<short> count);
        Vector64<ushort> ShiftLogicalScalar(Vector64<ushort> value, Vector64<short> count);
        Vector64<int> ShiftLogicalScalar(Vector64<int> value, Vector64<int> count);
        Vector64<uint> ShiftLogicalScalar(Vector64<uint> value, Vector64<int> count);

        /// <summary>
        /// Signed Extend Long
        /// For each element result[elem] = value[elem] << shift
        /// Corresponds to vector forms of SXTL
        /// </summary>
        Vector128<short> SignExtendAndWidenLower(Vector64<sbyte> value, byte count);
        Vector128<int> SignExtendAndWidenLower(Vector64<short> value, byte count);
        Vector128<long> SignExtendAndWidenLower(Vector64<int> value, byte count);

        /// <summary>
        /// Unsigned Extend Long
        /// For each element result[elem] = value[elem] << shift
        /// Corresponds to vector forms of UXTL
        /// </summary>
        Vector128<short> ZeroExtendAndWidenLower(Vector64<sbyte> value, byte count);
        Vector128<ushort> ZeroExtendAndWidenLower(Vector64<byte> value, byte count);
        Vector128<int> ZeroExtendAndWidenLower(Vector64<short> value, byte count);
        Vector128<uint> ZeroExtendAndWidenLower(Vector64<ushort> value, byte count);
        Vector128<long> ZeroExtendAndWidenLower(Vector64<int> value, byte count);
        Vector128<ulong> ZeroExtendAndWidenLower(Vector64<uint> value, byte count);

        /// <summary>
        /// Signed Saturating Shift Right Unsigned Narrow Immediate
        /// For each element result[elem] = value[elem] >> shift
        /// Corresponds to vector forms of SQSHRUN
        /// </summary>
        Vector64<byte> ShiftRightArithmeticUnsignedAndNarrowSaturateLower(Vector128<short> value, byte count);
        Vector64<ushort> ShiftRightArithmeticUnsignedAndNarrowSaturateLower(Vector128<int> value, byte count);
        Vector64<uint> ShiftRightArithmeticUnsignedAndNarrowSaturateLower(Vector128<long> value, byte count);

        /// <summary>
        /// Signed Saturating Rounded Shift Right Unsigned Narrow Immediate
        /// For each element result[elem] = value[elem] >> shift
        /// Corresponds to vector forms of SQRSHRUN
        /// </summary>
        Vector64<byte> ShiftRightArithmeticUnsignedAndNarrowRoundedSaturateLower(Vector128<short> value, byte count);
        Vector64<ushort> ShiftRightArithmeticUnsignedAndNarrowRoundedSaturateLower(Vector128<int> value, byte count);
        Vector64<uint> ShiftRightArithmeticUnsignedAndNarrowRoundedSaturateLower(Vector128<long> value, byte count);

        /// <summary>
        /// Shift Right Narrow Immediate
        /// For each element result[elem] = value[elem] >> shift
        /// Corresponds to vector forms of SHRN2
        /// </summary>
        Vector128<sbyte> ShiftRightLogicalAndNarrowUpper(Vector64<sbyte> lower, Vector128<short> value, byte count);
        Vector128<byte> ShiftRightLogicalAndNarrowUpper(Vector64<byte> lower, Vector128<ushort> value, byte count);
        Vector128<short> ShiftRightLogicalAndNarrowUpper(Vector64<short> lower, Vector128<int> value, byte count);
        Vector128<ushort> ShiftRightLogicalAndNarrowUpper(Vector64<ushort> lower, Vector128<uint> value, byte count);
        Vector128<int> ShiftRightLogicalAndNarrowUpper(Vector64<int> lower, Vector128<long> value, byte count);
        Vector128<uint> ShiftRightLogicalAndNarrowUpper(Vector64<uint> lower, Vector128<ulong> value, byte count);

        /// <summary>
        /// Rounding Shift Right Narrow Immediate
        /// For each element result[elem] = (value[elem] + (1 << (shift - 1))) >> shift
        /// Corresponds to vector forms of RSHRN2
        /// </summary>
        Vector128<sbyte> ShiftRightLogicalAndNarrowRoundedUpper(Vector64<sbyte> lower, Vector128<short> value, byte count);
        Vector128<byte> ShiftRightLogicalAndNarrowRoundedUpper(Vector64<byte> lower, Vector128<ushort> value, byte count);
        Vector128<short> ShiftRightLogicalAndNarrowRoundedUpper(Vector64<short> lower, Vector128<int> value, byte count);
        Vector128<ushort> ShiftRightLogicalAndNarrowRoundedUpper(Vector64<ushort> lower, Vector128<uint> value, byte count);
        Vector128<int> ShiftRightLogicalAndNarrowRoundedUpper(Vector64<int> lower, Vector128<long> value, byte count);
        Vector128<uint> ShiftRightLogicalAndNarrowRoundedUpper(Vector64<uint> lower, Vector128<ulong> value, byte count);

        /// <summary>
        /// Shift Left Long
        /// For each element result[elem] = value[elem] << shift
        /// Corresponds to vector forms of SHLL2
        /// </summary>
        Vector128<short> ShiftLeftLogicalAndWidenUpper(Vector128<sbyte> value, byte count);
        Vector128<ushort> ShiftLeftLogicalAndWidenUpper(Vector128<byte> value, byte count);
        Vector128<int> ShiftLeftLogicalAndWidenUpper(Vector128<short> value, byte count);
        Vector128<uint> ShiftLeftLogicalAndWidenUpper(Vector128<ushort> value, byte count);
        Vector128<long> ShiftLeftLogicalAndWidenUpper(Vector128<int> value, byte count);
        Vector128<ulong> ShiftLeftLogicalAndWidenUpper(Vector128<uint> value, byte count);

        /// <summary>
        /// Signed Extend Long
        /// For each element result[elem] = value[elem] << shift
        /// Corresponds to vector forms of SXTL2
        /// </summary>
        Vector128<short> SignExtendAndWidenUpper(Vector128<sbyte> value, byte count);
        Vector128<int> SignExtendAndWidenUpper(Vector128<short> value, byte count);
        Vector128<long> SignExtendAndWidenUpper(Vector128<int> value, byte count);

        /// <summary>
        /// Unsigned Extend Long
        /// For each element result[elem] = value[elem] << shift
        /// Corresponds to vector forms of UXTL2
        /// </summary>
        Vector128<short> ZeroExtendAndWidenUpper(Vector128<sbyte> value, byte count);
        Vector128<ushort> ZeroExtendAndWidenUpper(Vector128<byte> value, byte count);
        Vector128<int> ZeroExtendAndWidenUpper(Vector128<short> value, byte count);
        Vector128<uint> ZeroExtendAndWidenUpper(Vector128<ushort> value, byte count);
        Vector128<long> ZeroExtendAndWidenUpper(Vector128<int> value, byte count);
        Vector128<ulong> ZeroExtendAndWidenUpper(Vector128<uint> value, byte count);

        /// <summary>
        /// Unsigned Saturating Shift Right Narrow Immediate
        /// For each element result[elem] = value[elem] >> shift
        /// Corresponds to vector forms of UQSHRN2
        /// </summary>
        Vector128<sbyte> ShiftRightLogicalAndNarrowSaturateUpper(Vector64<sbyte> lower, Vector128<short> value, byte count);
        Vector128<byte> ShiftRightLogicalAndNarrowSaturateUpper(Vector64<byte> lower, Vector128<ushort> value, byte count);
        Vector128<short> ShiftRightLogicalAndNarrowSaturateUpper(Vector64<short> lower, Vector128<int> value, byte count);
        Vector128<ushort> ShiftRightLogicalAndNarrowSaturateUpper(Vector64<ushort> lower, Vector128<uint> value, byte count);
        Vector128<int> ShiftRightLogicalAndNarrowSaturateUpper(Vector64<int> lower, Vector128<long> value, byte count);
        Vector128<uint> ShiftRightLogicalAndNarrowSaturateUpper(Vector64<uint> lower, Vector128<ulong> value, byte count);

        /// <summary>
        /// Unsigned Saturating Rounded Shift Right Narrow Immediate
        /// For each element result[elem] = value[elem] >> shift
        /// Corresponds to vector forms of UQRSHRN2
        /// </summary>
        Vector128<sbyte> ShiftRightLogicalAndNarrowRoundedSaturateUpper(Vector64<sbyte> lower, Vector128<short> value, byte count);
        Vector128<byte> ShiftRightLogicalAndNarrowRoundedSaturateUpper(Vector64<byte> lower, Vector128<ushort> value, byte count);
        Vector128<short> ShiftRightLogicalAndNarrowRoundedSaturateUpper(Vector64<short> lower, Vector128<int> value, byte count);
        Vector128<ushort> ShiftRightLogicalAndNarrowRoundedSaturateUpper(Vector64<ushort> lower, Vector128<uint> value, byte count);
        Vector128<int> ShiftRightLogicalAndNarrowRoundedSaturateUpper(Vector64<int> lower, Vector128<long> value, byte count);
        Vector128<uint> ShiftRightLogicalAndNarrowRoundedSaturateUpper(Vector64<uint> lower, Vector128<ulong> value, byte count);

        /// <summary>
        /// Signed Saturating Shift Right Narrow Immediate
        /// For each element result[elem] = value[elem] >> shift
        /// Corresponds to vector forms of SQSHRN2
        /// </summary>
        Vector128<sbyte> ShiftRightArithmeticAndNarrowSaturateUpper(Vector64<sbyte> lower, Vector128<short> value, byte count);
        Vector128<short> ShiftRightArithmeticAndNarrowSaturateUpper(Vector64<short> lower, Vector128<int> value, byte count);
        Vector128<int> ShiftRightArithmeticAndNarrowSaturateUpper(Vector64<int> lower, Vector128<long> value, byte count);

        /// <summary>
        /// Signed Saturating Rounded Shift Right Narrow Immediate
        /// For each element result[elem] = value[elem] >> shift
        /// Corresponds to vector forms of SQRSHRN2
        /// </summary>
        Vector128<sbyte> ShiftRightArithmeticAndNarrowRoundedSaturateUpper(Vector64<sbyte> lower, Vector128<short> value, byte count);
        Vector128<short> ShiftRightArithmeticAndNarrowRoundedSaturateUpper(Vector64<short> lower, Vector128<int> value, byte count);
        Vector128<int> ShiftRightArithmeticAndNarrowRoundedSaturateUpper(Vector64<int> lower, Vector128<long> value, byte count);

        /// <summary>
        /// Signed Saturating Shift Right Unsigned Narrow Immediate
        /// For each element result[elem] = value[elem] >> shift
        /// Corresponds to vector forms of SQSHRUN2
        /// </summary>
        Vector128<byte> ShiftRightArithmeticUnsignedAndNarrowSaturateUpper(Vector64<sbyte> lower, Vector128<short> value, byte count);
        Vector128<ushort> ShiftRightArithmeticUnsignedAndNarrowSaturateUpper(Vector64<short> lower, Vector128<int> value, byte count);
        Vector128<uint> ShiftRightArithmeticUnsignedAndNarrowSaturateUpper(Vector64<int> lower, Vector128<long> value, byte count);

        /// <summary>
        /// Signed Saturating Rounded Shift Right Unsigned Narrow Immediate
        /// For each element result[elem] = value[elem] >> shift
        /// Corresponds to vector forms of SQRSHRUN2
        /// </summary>
        Vector128<byte> ShiftRightArithmeticUnsignedAndNarrowRoundedSaturateUpper(Vector64<sbyte> lower, Vector128<short> value, byte count);
        Vector128<ushort> ShiftRightArithmeticUnsignedAndNarrowRoundedSaturateUpper(Vector64<short> lower, Vector128<int> value, byte count);
        Vector128<uint> ShiftRightArithmeticUnsignedAndNarrowRoundedSaturateUpper(Vector64<int> lower, Vector128<long> value, byte count);
    }
}

}
```

Since in #32512 we used Widening I am going to use the same naming scheme here instead of AndWiden.

cc @tannergooding

I am asking for trouble here :) but would it make sense to name(re-name) AndNarrow methods to Narrowing?

Namely,

AddHighNarrowLower -> AddHighNarrowingLower
AddHighNarrowUpper -> AddHighNarrowingUpper

ExtractAndNarrowHigh -> ExtractNarrowingUpper
ExtractAndNarrowLow -> ExtractNarrowingLower

ShiftRightLogicalAndNarrowLower -> ShiftRightLogicalNarrowingLower
ShiftRightLogicalAndNarrowUpper -> ShiftRightLogicalNarrowingUpper

ShiftRightLogicalRoundedAndNarrowLower ->ShiftRightLogicalRoundedNarrowingLower
ShiftRightLogicalRoundedAndNarrowUpper ->ShiftRightLogicalRoundedNarrowingUpper

Could you write up a small proposal and send it to the internal e-mail alias for @dotnet/fxdc. I think the adjustment here is small enough we can come to an agreement over e-mail (and it makes things consistent with how we used Widening)

Could you write up a small proposal and send it to the internal e-mail alias for @dotnet/fxdc. I think the adjustment here is small enough we can come to an agreement over e-mail (and it makes things consistent with how we used Widening)

@tannergooding I will

Was this page helpful?
0 / 5 - 0 ratings

Related issues

bencz picture bencz  路  3Comments

Timovzl picture Timovzl  路  3Comments

noahfalk picture noahfalk  路  3Comments

jkotas picture jkotas  路  3Comments

v0l picture v0l  路  3Comments