Runtime: API Proposal : ARM64 Simd simple ops

Created on 5 Jan 2018 · 32Comments · Source: dotnet/runtime

@eerhardt @CarolEidt @RussKeldorph

Target Framework netcoreapp2.1

```C#
namespace System.Runtime.Intrinsics.Arm.Arm64
{
///

/// This class provides access to the Arm64 AdvSIMD intrinsics
///
/// Arm64 CPUs indicate support for this feature by setting
/// ID_AA64PFR0_EL1.AdvSIMD == 0 or better.
///

public static class Simd
{
public static bool IsSupported { get { throw null; } }

    /// <summary>
    /// Vector abs
    /// Corresponds to vector forms of ARM64 ABS & FABS
    /// </summary>
    public static Vector64<byte>    Abs(Vector64<sbyte>   value) { throw null; }
    public static Vector64<ushort>  Abs(Vector64<short>   value) { throw null; }
    public static Vector64<uint>    Abs(Vector64<int>     value) { throw null; }
    public static Vector64<float>   Abs(Vector64<float>   value) { throw null; }
    public static Vector128<byte>   Abs(Vector128<sbyte>  value) { throw null; }
    public static Vector128<ushort> Abs(Vector128<short>  value) { throw null; }
    public static Vector128<uint>   Abs(Vector128<int>    value) { throw null; }
    public static Vector128<ulong>  Abs(Vector128<long>   value) { throw null; }
    public static Vector128<float>  Abs(Vector128<float>  value) { throw null; }
    public static Vector128<double> Abs(Vector128<double> value) { throw null; }

    /// <summary>
    /// Vector add
    /// Corresponds to vector forms of ARM64 ADD & FADD
    /// </summary>
    public static Vector64<T>    Add<T>(Vector64<T>    left, Vector64<T>    right) where T : struct { throw null; }
    public static Vector128<T>   Add<T>(Vector128<T>   left, Vector128<T>   right) where T : struct { throw null; }

    /// <summary>
    /// Vector and
    /// Corresponds to vector forms of ARM64 AND
    /// </summary>
    public static Vector64<T>    And<T>(Vector64<T>    left, Vector64<T>    right) where T : struct { throw null; }
    public static Vector128<T>   And<T>(Vector128<T>   left, Vector128<T>   right) where T : struct { throw null; }

    /// <summary>
    /// Vector and not
    /// Corresponds to vector forms of ARM64 BIC
    /// </summary>
    public static Vector64<T>    AndNot<T>(Vector64<T>    left, Vector64<T>    right) where T : struct { throw null; }
    public static Vector128<T>   AndNot<T>(Vector128<T>   left, Vector128<T>   right) where T : struct { throw null; }

    /// <summary>
    /// Vector Divide
    /// Corresponds to vector forms of ARM64 FDIV
    /// </summary>
    public static Vector64<float>   Divide(Vector64<float>   left, Vector64<float>   right) { throw null; }
    public static Vector128<float>  Divide(Vector128<float>  left, Vector128<float>  right) { throw null; }
    public static Vector128<double> Divide(Vector128<double> left, Vector128<double> right) { throw null; }

    /// <summary>
    /// Vector max
    /// Corresponds to vector forms of ARM64 SMAX, UMAX & FMAX
    /// </summary>
    public static Vector64<T>    Max<T>(Vector64<T>    left, Vector64<T>    right) where T : struct { throw null; }
    public static Vector128<T>   Max<T>(Vector128<T>   left, Vector128<T>   right) where T : struct { throw null; }

    /// <summary>
    /// Vector min
    /// Corresponds to vector forms of ARM64 SMIN, UMIN & FMIN
    /// </summary>
    public static Vector64<T>    Min<T>(Vector64<T>    left, Vector64<T>    right) where T : struct { throw null; }
    public static Vector128<T>   Min<T>(Vector128<T>   left, Vector128<T>   right) where T : struct { throw null; }

    /// <summary>
    /// Vector multiply
    ///
    /// For each element result[elem] = left[elem] * right[elem]
    ///
    /// Corresponds to vector forms of ARM64 MUL & FMUL
    /// </summary>
    public static Vector64<byte>    Multiply(Vector64<byte>    left, Vector64<byte>    right) { throw null; }
    public static Vector64<sbyte>   Multiply(Vector64<sbyte>   left, Vector64<sbyte>   right) { throw null; }
    public static Vector64<ushort>  Multiply(Vector64<ushort>  left, Vector64<ushort>  right) { throw null; }
    public static Vector64<short>   Multiply(Vector64<short>   left, Vector64<short>   right) { throw null; }
    public static Vector64<uint>    Multiply(Vector64<uint>    left, Vector64<uint>    right) { throw null; }
    public static Vector64<int>     Multiply(Vector64<int>     left, Vector64<int>     right) { throw null; }
    public static Vector64<float>   Multiply(Vector64<float>   left, Vector64<float>   right) { throw null; }
    public static Vector128<byte>   Multiply(Vector128<byte>   left, Vector128<byte>   right) { throw null; }
    public static Vector128<sbyte>  Multiply(Vector128<sbyte>  left, Vector128<sbyte>  right) { throw null; }
    public static Vector128<ushort> Multiply(Vector128<ushort> left, Vector128<ushort> right) { throw null; }
    public static Vector128<short>  Multiply(Vector128<short>  left, Vector128<short>  right) { throw null; }
    public static Vector128<uint>   Multiply(Vector128<uint>   left, Vector128<uint>   right) { throw null; }
    public static Vector128<int>    Multiply(Vector128<int>    left, Vector128<int>    right) { throw null; }
    public static Vector128<float>  Multiply(Vector128<float>  left, Vector128<float>  right) { throw null; }
    public static Vector128<double> Multiply(Vector128<double> left, Vector128<double> right) { throw null; }

    /// <summary>
    /// Vector negate
    /// Corresponds to vector forms of ARM64 NEG & FNEG
    /// </summary>
    public static Vector64<sbyte>   Negate(Vector64<sbyte>   value) { throw null; }
    public static Vector64<short>   Negate(Vector64<short>   value) { throw null; }
    public static Vector64<int>     Negate(Vector64<int>     value) { throw null; }
    public static Vector64<float>   Negate(Vector64<float>   value) { throw null; }
    public static Vector128<sbyte>  Negate(Vector128<sbyte>  value) { throw null; }
    public static Vector128<short>  Negate(Vector128<short>  value) { throw null; }
    public static Vector128<int>    Negate(Vector128<int>    value) { throw null; }
    public static Vector128<long>   Negate(Vector128<long>   value) { throw null; }
    public static Vector128<float>  Negate(Vector128<float>  value) { throw null; }
    public static Vector128<double> Negate(Vector128<double> value) { throw null; }

    /// <summary>
    /// Vector not
    /// Corresponds to vector forms of ARM64 NOT
    /// </summary>
    public static Vector64<T>    Not<T>(Vector64<T>    value) where T : struct { throw null; }
    public static Vector128<T>   Not<T>(Vector128<T> value) where T : struct { throw null; }

    /// <summary>
    /// Vector or
    /// Corresponds to vector forms of ARM64 ORR
    /// </summary>
    public static Vector64<T>    Or<T>(Vector64<T>    left, Vector64<T>    right) where T : struct { throw null; }
    public static Vector128<T>   Or<T>(Vector128<T>   left, Vector128<T>   right) where T : struct { throw null; }

    /// <summary>
    /// Vector or not
    /// Corresponds to vector forms of ARM64 ORN
    /// </summary>
    public static Vector64<T>    OrNot<T>(Vector64<T>    left, Vector64<T>    right) where T : struct { throw null; }
    public static Vector128<T>   OrNot<T>(Vector128<T>   left, Vector128<T>   right) where T : struct { throw null; }

    /// <summary>
    /// Vector square root
    /// Corresponds to vector forms of ARM64 FRSQRT
    /// </summary>
    public static Vector64<float>   Sqrt(Vector64<float>   value) { throw null; }
    public static Vector128<float>  Sqrt(Vector128<float>  value) { throw null; }
    public static Vector128<double> Sqrt(Vector128<double> value) { throw null; }

    /// <summary>
    /// Vector subtract
    /// Corresponds to vector forms of ARM64 SUB & FSUB
    /// </summary>
    public static Vector64<T>    Subtract<T>(Vector64<T>    left, Vector64<T>    right) where T : struct { throw null; }
    public static Vector128<T>   Subtract<T>(Vector128<T>   left, Vector128<T>   right) where T : struct { throw null; }

    /// <summary>
    /// Vector exclusive or
    /// Corresponds to vector forms of ARM64 EOR
    /// </summary>
    public static Vector64<T>    Xor<T>(Vector64<T>    left, Vector64<T>    right) where T : struct { throw null; }
    public static Vector128<T>   Xor<T>(Vector128<T>   left, Vector128<T>   right) where T : struct { throw null; }

}
}
```

api-approved arch-arm64 area-System.Runtime.Intrinsics up-for-grabs

Source

sdmaclea

👍3 🎉1

All 32 comments

The Intel intrinsics tend to use generics for similar methods.

jkotas on 5 Jan 2018

The Intel intrinsics tend to use generics for similar methods.

That is not what I saw in https://github.com/dotnet/coreclr/blob/master/src/mscorlib/src/System/Runtime/Intrinsics/X86/Sse.cs#L18-L41

sdmaclea on 5 Jan 2018

I should have said similar patterns - if there are overloads for all primitive types, the Intel intrinsics just use generic.

jkotas on 5 Jan 2018

👍1

For the Vector64<> methods usually all primitive type except if the size is such that it would not be a vector. Vector64<double>, Vector64<ulong>, and Vector64<long> are essentially nonsensical. I think we should use generics even in this case.

Comments?
cc @4creators

sdmaclea on 5 Jan 2018

I think we should use generics even in this case.

I agree.

CarolEidt on 5 Jan 2018

OK I have update to use generics. Will do the same in other proposals.

Abs conceptually doesn't make sense for unsigned types. And currently the return type does not match the argument type.
Multiply doesn't support long/ulong vectors.
Negate conceptually doesn't make sense for unsigned primitives. (But could treat as signed.
Divide and Sqrt only support floating point types.

sdmaclea on 5 Jan 2018

👍1

Intel HW intrinsics use pattern like the following in Sse2 (missing float function is implemented in Sse):

```C#
///

/// __m128i _mm_and_si128 (__m128i a, __m128i b)
///