public static partial class MemoryExtensions
{
int IndexNotOf(this Span<T> span, T value) where T : IEquatable<T>;
int IndexNotOfAny(this Span<T> span, T value0, T value1) where T : IEquatable<T>;
int IndexNotOfAny(this Span<T> span, T value0, T value1, T value2) where T : IEquatable<T>;
int IndexNotOfAny(this Span<T> span, ReadOnlySpan<T> values) where T : IEquatable<T>;
int IndexNotOf(this ReadOnlySpan<T> span, T value) where T : IEquatable<T>;
int IndexNotOfAny(this ReadOnlySpan<T> span, T value0, T value1) where T : IEquatable<T>;
int IndexNotOfAny(this ReadOnlySpan<T> span, T value0, T value1, T value2) where T : IEquatable<T>;
int IndexNotOfAny(this ReadOnlySpan<T> span, ReadOnlySpan<T> values) where T : IEquatable<T>;
int LastIndexNotOf(this Span<T> span, T value) where T : IEquatable<T>;
int LastIndexNotOfAny(this Span<T> span, T value0, T value1) where T : IEquatable<T>;
int LastIndexNotOfAny(this Span<T> span, T value0, T value1, T value2) where T : IEquatable<T>;
int LastIndexNotOfAny(this Span<T> span, ReadOnlySpan<T> values) where T : IEquatable<T>;
int LastIndexNotOf(this ReadOnlySpan<T> span, T value) where T : IEquatable<T>;
int LastIndexNotOfAny(this ReadOnlySpan<T> span, T value0, T value1) where T : IEquatable<T>;
int LastIndexNotOfAny(this ReadOnlySpan<T> span, T value0, T value1, T value2) where T : IEquatable<T>;
int LastIndexNotOfAny(this ReadOnlySpan<T> span, ReadOnlySpan<T> values) where T : IEquatable<T>;
}
Example usage:
var firstNonSpace = span.IndexNotOf(' ');
if (firstNonSpace > 0)
{
return span.Slice(firstNonSpace);
}
return span;
As an alternative name - IndexOfAnyBut
Or IndexOfAnyExcept
Are these more commonly needed on Span than on String? Since we've got by without them on String since day 1 (which is not an argument against, but it's a datapoint)
Are these more commonly needed on Span than on String?
var firstNonSpace = text.AsSpan().IndexNotOf(' ');
if (firstNonSpace > 0)
{
return text.Substring(firstNonSpace, text.Length - firstNonSpace);
}
return text;
馃槈
string would also presumably require culture and ignorecase variants? Thought I'd start simple :)
What about Find which takes a predicate? That should cover this scenario?
What about Find which takes a predicate?
Can't vectorize that for primitive types
I feel like we should instead try to look into making some of the delegate scenarios vectorizable but I don't feel super strongly on not having this API (just feel like we will start adding up other APIs like: IndexOfValueInRange IndexOfValueNotInRange soon and that can quickly pollute the framework)
If we add this, we can take advantage of it from our Regex implementation. It now uses IndexOf and could easily use IndexNotOf should it be available. For example, building on @benaadams' example from earlier in the issue, if you had a regex "eat *spaces" that would find "eat" followed by any number of spaces followed "spaces", that would end up being represented in our IR as a multi ("eat") followed by a one loop (" *") followed by another multi ("spaces"), and we'd use the IndexNotOf to implement the one loop.
@danmosemsft Are these more commonly needed on Span than on String? Since we've got by without them on String since day 1 (which is not an argument against, but it's a datapoint)
For string would could re-implement the following apis with these span versions
partial class String
{
string Trim();
string Trim(char trimChar);
string Trim(params char[] trimChars);
string TrimEnd();
string TrimEnd(char trimChar);
string TrimEnd(params char[] trimChars);
string TrimStart();
string TrimStart(char trimChar);
string TrimStart(params char[] trimChars);
}
For string would could re-implement the following apis with these span versions
We could, but I'm skeptical we'd want to... in my experience the number of spaces being trimmed is usually few to none, which could make the overhead here be very pronounced. Also, the trim methods that don't take characters use IsWhitespace and would need to compare against every Unicode space character.
Would we want all the variety of overloads we have for IndexOf, etc? That would give us
c#
public static int IndexNotOf<T>(this ReadOnlySpan<T> span, T value) where T : IEquatable<T>;
public static int IndexNotOf<T>(this Span<T> span, T value) where T : IEquatable<T>;
public static int IndexNotOf<T>(this Span<T> span, ReadOnlySpan<T> value) where T : IEquatable<T>;
public static int IndexNotOf<T>(this ReadOnlySpan<T> span, ReadOnlySpan<T> value) where T : IEquatable<T>;
public static int IndexNotOf(this ReadOnlySpan<char> span, ReadOnlySpan<char> value, StringComparison comparisonType);
public static int IndexNotOfAny<T>(this Span<T> span, T value0, T value1) where T : IEquatable<T>;
public static int IndexNotOfAny<T>(this Span<T> span, T value0, T value1, T value2) where T : IEquatable<T>;
public static int IndexNotOfAny<T>(this ReadOnlySpan<T> span, T value0, T value1, T value2) where T : IEquatable<T>;
public static int IndexNotOfAny<T>(this ReadOnlySpan<T> span, T value0, T value1) where T : IEquatable<T>;
public static int IndexNotOfAny<T>(this ReadOnlySpan<T> span, ReadOnlySpan<T> values) where T : IEquatable<T>;
public static int IndexNotOfAny<T>(this Span<T> span, ReadOnlySpan<T> values) where T : IEquatable<T>;
public static int LastIndexNotOf<T>(this ReadOnlySpan<T> span, T value) where T : IEquatable<T>;
public static int LastIndexNotOf(this ReadOnlySpan<char> span, ReadOnlySpan<char> value, StringComparison comparisonType);
public static int LastIndexNotOf<T>(this Span<T> span, T value) where T : IEquatable<T>;
public static int LastIndexNotOf<T>(this Span<T> span, ReadOnlySpan<T> value) where T : IEquatable<T>;
public static int LastIndexNotOf<T>(this ReadOnlySpan<T> span, ReadOnlySpan<T> value) where T : IEquatable<T>;
public static int LastIndexNotOfAny<T>(this Span<T> span, T value0, T value1, T value2) where T : IEquatable<T>;
public static int LastIndexNotOfAny<T>(this Span<T> span, T value0, T value1) where T : IEquatable<T>;
public static int LastIndexNotOfAny<T>(this Span<T> span, ReadOnlySpan<T> values) where T : IEquatable<T>;
public static int LastIndexNotOfAny<T>(this ReadOnlySpan<T> span, T value0, T value1, T value2) where T : IEquatable<T>;
public static int LastIndexNotOfAny<T>(this ReadOnlySpan<T> span, T value0, T value1) where T : IEquatable<T>;
public static int LastIndexNotOfAny<T>(this ReadOnlySpan<T> span, ReadOnlySpan<T> values) where T : IEquatable<T>;
I'm implementing JsonPatch using System.Text.Json (the existing ASP.Net Core JsonPatch support is based on Newtonsoft) and this would be incredibly useful to parse array indexes as part of implementing JSON Pointer.
The JSON Pointer RFC defines a valid array index as:
array-index = %x30 / ( %x31-39 *(%x30-39) )
; "0", or digits without a leading "0"
So ideally, I'd love to use this as my validation for that:
var isValidArrayIndex =
span.SequenceEqual(Zero) ||
(span.IndexOfAny(OneThroughNine) == 0 &&
span.IndexNotOfAny(Digits) == -1);
Thanks for the info. Perhaps someone wants to share thoughts about whether it should be the full set of overloads or not. Then perhaps this is ready for API review.
Don't think the StringComparison ones are needed unless someone asks specifically; ordinal should be fine as normally looking for a special delimiter (spaces, slashes, brackets, etc)
IndexNotOf(this ReadOnlySpan<char> span, ReadOnlySpan<char> value, StringComparison comparisonType)
Are there good scenarios for LastIndexNotOfXXX if we wouldn't use them for TrimXXX ?
In the top proposal ther'es a mixture of Span and ReadOnlySpan maybe we could clean up which we need.
Span and ReadOnly span are both needed as they are extension methods; e.g. string only converts to ReadOnlySpan but array would convert to Span.
Don't have any strong feelings for LastIndexNotOf... dropped from summary
IndexNotOf<T>(this Span<T> span, ReadOnlySpan<T> value) where T : IEquatable<T> would be a bit strange as is a multichar delimiter
Another example where it could be used
https://github.com/dotnet/runtime/blob/effaf28d0ca74ab9ae11fc947f524393e545af0d/src/libraries/System.Private.CoreLib/src/System/IO/PathInternal.Windows.cs#L406-L411
return path.IndexNotOf(' ') < 0
@benaadams for single char/element scenario perhaps might be better to try to optimize Linq to do that fast (perhaps detect patterns similar to path.Any(c => c == ' ')).
It's a not pattern so would be
path.All(c => c == ' ')
However having Linq autovectorize from a IEnumerable and indirect Func would be a challenge?
public static bool All<char>(this IEnumerable<char> source, Func<char, bool> predicate)
Could change the predicate to be Expression<Func<T>> and then parse it (which would be a big task in itself); however the IEnumerable doesn't imply contiguous data for vectorization.
Span/ReadOnlySpan are also ref structs so can't convert to IEnumerable; so would need to add them explicitly; as which point its just changing namespace from MemoryExtensions where all the Span/ReadOnlySpan extensions currently are to Linq?
Some kind of Linq-style autovectorization for Span would be interesting via C# source generators where it converts the expressions to platform intrinsics e.g.
public static bool All<TSource>(this ReadOnlySpan<TSource> source, Expression<Func<TSource, bool>> predicate)
where TSource : unmanaged
but it's probably also a multi year research project?
Span and ReadOnly span are both needed as they are extension methods; e.g. string only converts to ReadOnlySpan but array would convert to Span.
Yeah, I was confused because you originally had ROS for LastIndexOfAny, and not IndexOfAny. 馃槃
It looks good to me -- @GrabYourPitchforks ?
Ben's original proposal (plus their _Last*_ counterparts) looks ready to go forward to review. We shouldn't provide StringComparison overloads since we're not comparing a sequence of elements against a sequence of elements. Instead, we're looking at each element in the source sequence in isolation without regard to any adjacent elements in the source sequence. By definition this means we're not providing a linguistic text search.
Steve already provided reasons for not rewriting string.Trim atop these newly-introduced APIs. I agree with his reasoning. I would even go further and suggest that we _not_ vectorize the implementations of these newly-introduced methods, as I believe the use cases for these methods (much like the use cases for Trim) will terminate very early and won't benefit from vectorization.
Added the Last ones back
suggest that we not vectorize the implementations of these newly-introduced methods,
@stephentoub when you mentioned taking advantage in Regex, were you hoping they'd be faster than the naive loop it currently does? Unfortunately the number of characters that the regex might scan could be anything from one to megabytes. For regex, a vectorized approach might be a better tradeoff across all patterns/texts.
FWIW the implementation nit I mentioned above doesn't have to be resolved before API review. We can always create the appropriate implementation as we become aware of various scenarios.
The only real requirement from an SDL perspective is that given this:
int idx = source.IndexNotOfAny(values);
The runtime complexity of this method must be bound by O(idx * values.Length). (Unless idx = -1, at which point the runtime complexity may be bound by O(source.Length * values.Length)). Vectorization vs. non-vectorization simply changes the constant factor, but it won't change the runtime complexity. IndexOfAny's current implementation follows the same constraint. The reason for this is that if you call it in a loop, you want _the loop's total complexity_ to be bound by O(source.Length * values.Length), not O(source.Length^2 * values.Length), which could allow algorithmic complexity attacks.
when you mentioned taking advantage in Regex, were you hoping they'd be faster than the naive loop it currently does?
There are two benefits for regex:
Just the first is a reasonable place to start. If that can be done, it's worth using in regex. Then if the implementation can be made even faster, yay, regex just gets better implicitly.
I do agree with Levi, though; I think it's likely that even in the regex scenarios we'd find in common cases the loops to have very short iteration counts. But it'd be worth looking at what the opportunities are for using the method and then looking at common patterns and inputs to see what we think the win would actually be.
Implementation details... ;)
Sounds like is ready to mark for review?
Sounds like is ready to mark for review?
Marked. :)
```C#
namespace System
{
public static partial class MemoryExtensions
{
int IndexNotOf(this Span
int IndexNotOfAny(this Span
int IndexNotOfAny(this Span
int IndexNotOfAny(this Span
int IndexNotOf(this ReadOnlySpan<T> span, T value) where T : IEquatable<T>;
int IndexNotOfAny(this ReadOnlySpan<T> span, T value0, T value1) where T : IEquatable<T>;
int IndexNotOfAny(this ReadOnlySpan<T> span, T value0, T value1, T value2) where T : IEquatable<T>;
int IndexNotOfAny(this ReadOnlySpan<T> span, ReadOnlySpan<T> values) where T : IEquatable<T>;
int LastIndexNotOf(this Span<T> span, T value) where T : IEquatable<T>;
int LastIndexNotOfAny(this Span<T> span, T value0, T value1) where T : IEquatable<T>;
int LastIndexNotOfAny(this Span<T> span, T value0, T value1, T value2) where T : IEquatable<T>;
int LastIndexNotOfAny(this Span<T> span, ReadOnlySpan<T> values) where T : IEquatable<T>;
int LastIndexNotOf(this ReadOnlySpan<T> span, T value) where T : IEquatable<T>;
int LastIndexNotOfAny(this ReadOnlySpan<T> span, T value0, T value1) where T : IEquatable<T>;
int LastIndexNotOfAny(this ReadOnlySpan<T> span, T value0, T value1, T value2) where T : IEquatable<T>;
int LastIndexNotOfAny(this ReadOnlySpan<T> span, ReadOnlySpan<T> values) where T : IEquatable<T>;
}
}
```
Most helpful comment
Or
IndexOfAnyExcept