Skip to content

Commit 4eb19aa

Browse files
Merge branch 'main' into js/accumulative-memory-limit
2 parents 4259ca7 + d3ca6eb commit 4eb19aa

22 files changed

Lines changed: 19147 additions & 784 deletions

src/ImageSharp/Advanced/ParallelExecutionSettings.cs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,10 @@ public readonly struct ParallelExecutionSettings
1818
/// <summary>
1919
/// Initializes a new instance of the <see cref="ParallelExecutionSettings"/> struct.
2020
/// </summary>
21-
/// <param name="maxDegreeOfParallelism">The value used for initializing <see cref="ParallelOptions.MaxDegreeOfParallelism"/> when using TPL.</param>
21+
/// <param name="maxDegreeOfParallelism">
22+
/// The value used for initializing <see cref="ParallelOptions.MaxDegreeOfParallelism"/> when using TPL.
23+
/// Set to <c>-1</c> to leave the degree of parallelism unbounded.
24+
/// </param>
2225
/// <param name="minimumPixelsProcessedPerTask">The value for <see cref="MinimumPixelsProcessedPerTask"/>.</param>
2326
/// <param name="memoryAllocator">The <see cref="MemoryAllocator"/>.</param>
2427
public ParallelExecutionSettings(
@@ -44,7 +47,10 @@ public ParallelExecutionSettings(
4447
/// <summary>
4548
/// Initializes a new instance of the <see cref="ParallelExecutionSettings"/> struct.
4649
/// </summary>
47-
/// <param name="maxDegreeOfParallelism">The value used for initializing <see cref="ParallelOptions.MaxDegreeOfParallelism"/> when using TPL.</param>
50+
/// <param name="maxDegreeOfParallelism">
51+
/// The value used for initializing <see cref="ParallelOptions.MaxDegreeOfParallelism"/> when using TPL.
52+
/// Set to <c>-1</c> to leave the degree of parallelism unbounded.
53+
/// </param>
4854
/// <param name="memoryAllocator">The <see cref="MemoryAllocator"/>.</param>
4955
public ParallelExecutionSettings(int maxDegreeOfParallelism, MemoryAllocator memoryAllocator)
5056
: this(maxDegreeOfParallelism, DefaultMinimumPixelsProcessedPerTask, memoryAllocator)
@@ -58,6 +64,7 @@ public ParallelExecutionSettings(int maxDegreeOfParallelism, MemoryAllocator mem
5864

5965
/// <summary>
6066
/// Gets the value used for initializing <see cref="ParallelOptions.MaxDegreeOfParallelism"/> when using TPL.
67+
/// A value of <c>-1</c> leaves the degree of parallelism unbounded.
6168
/// </summary>
6269
public int MaxDegreeOfParallelism { get; }
6370

src/ImageSharp/Advanced/ParallelRowIterator.cs

Lines changed: 74 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,14 @@ public static void IterateRows<T>(
4444
where T : struct, IRowOperation
4545
{
4646
ValidateRectangle(rectangle);
47+
ValidateSettings(parallelSettings);
4748

4849
int top = rectangle.Top;
4950
int bottom = rectangle.Bottom;
5051
int width = rectangle.Width;
5152
int height = rectangle.Height;
5253

53-
int maxSteps = DivideCeil(width * (long)height, parallelSettings.MinimumPixelsProcessedPerTask);
54-
int numOfSteps = Math.Min(parallelSettings.MaxDegreeOfParallelism, maxSteps);
54+
int numOfSteps = GetNumberOfSteps(width, height, parallelSettings);
5555

5656
// Avoid TPL overhead in this trivial case:
5757
if (numOfSteps == 1)
@@ -65,7 +65,7 @@ public static void IterateRows<T>(
6565
}
6666

6767
int verticalStep = DivideCeil(rectangle.Height, numOfSteps);
68-
ParallelOptions parallelOptions = new() { MaxDegreeOfParallelism = numOfSteps };
68+
ParallelOptions parallelOptions = CreateParallelOptions(parallelSettings, numOfSteps);
6969
RowOperationWrapper<T> wrappingOperation = new(top, bottom, verticalStep, in operation);
7070

7171
_ = Parallel.For(
@@ -109,14 +109,14 @@ public static void IterateRows<T, TBuffer>(
109109
where TBuffer : unmanaged
110110
{
111111
ValidateRectangle(rectangle);
112+
ValidateSettings(parallelSettings);
112113

113114
int top = rectangle.Top;
114115
int bottom = rectangle.Bottom;
115116
int width = rectangle.Width;
116117
int height = rectangle.Height;
117118

118-
int maxSteps = DivideCeil(width * (long)height, parallelSettings.MinimumPixelsProcessedPerTask);
119-
int numOfSteps = Math.Min(parallelSettings.MaxDegreeOfParallelism, maxSteps);
119+
int numOfSteps = GetNumberOfSteps(width, height, parallelSettings);
120120
MemoryAllocator allocator = parallelSettings.MemoryAllocator;
121121
int bufferLength = Unsafe.AsRef(in operation).GetRequiredBufferLength(rectangle);
122122

@@ -135,7 +135,7 @@ public static void IterateRows<T, TBuffer>(
135135
}
136136

137137
int verticalStep = DivideCeil(height, numOfSteps);
138-
ParallelOptions parallelOptions = new() { MaxDegreeOfParallelism = numOfSteps };
138+
ParallelOptions parallelOptions = CreateParallelOptions(parallelSettings, numOfSteps);
139139
RowOperationWrapper<T, TBuffer> wrappingOperation = new(top, bottom, verticalStep, bufferLength, allocator, in operation);
140140

141141
_ = Parallel.For(
@@ -174,14 +174,14 @@ public static void IterateRowIntervals<T>(
174174
where T : struct, IRowIntervalOperation
175175
{
176176
ValidateRectangle(rectangle);
177+
ValidateSettings(parallelSettings);
177178

178179
int top = rectangle.Top;
179180
int bottom = rectangle.Bottom;
180181
int width = rectangle.Width;
181182
int height = rectangle.Height;
182183

183-
int maxSteps = DivideCeil(width * (long)height, parallelSettings.MinimumPixelsProcessedPerTask);
184-
int numOfSteps = Math.Min(parallelSettings.MaxDegreeOfParallelism, maxSteps);
184+
int numOfSteps = GetNumberOfSteps(width, height, parallelSettings);
185185

186186
// Avoid TPL overhead in this trivial case:
187187
if (numOfSteps == 1)
@@ -192,7 +192,7 @@ public static void IterateRowIntervals<T>(
192192
}
193193

194194
int verticalStep = DivideCeil(rectangle.Height, numOfSteps);
195-
ParallelOptions parallelOptions = new() { MaxDegreeOfParallelism = numOfSteps };
195+
ParallelOptions parallelOptions = CreateParallelOptions(parallelSettings, numOfSteps);
196196
RowIntervalOperationWrapper<T> wrappingOperation = new(top, bottom, verticalStep, in operation);
197197

198198
_ = Parallel.For(
@@ -236,14 +236,14 @@ public static void IterateRowIntervals<T, TBuffer>(
236236
where TBuffer : unmanaged
237237
{
238238
ValidateRectangle(rectangle);
239+
ValidateSettings(parallelSettings);
239240

240241
int top = rectangle.Top;
241242
int bottom = rectangle.Bottom;
242243
int width = rectangle.Width;
243244
int height = rectangle.Height;
244245

245-
int maxSteps = DivideCeil(width * (long)height, parallelSettings.MinimumPixelsProcessedPerTask);
246-
int numOfSteps = Math.Min(parallelSettings.MaxDegreeOfParallelism, maxSteps);
246+
int numOfSteps = GetNumberOfSteps(width, height, parallelSettings);
247247
MemoryAllocator allocator = parallelSettings.MemoryAllocator;
248248
int bufferLength = Unsafe.AsRef(in operation).GetRequiredBufferLength(rectangle);
249249

@@ -259,7 +259,7 @@ public static void IterateRowIntervals<T, TBuffer>(
259259
}
260260

261261
int verticalStep = DivideCeil(height, numOfSteps);
262-
ParallelOptions parallelOptions = new() { MaxDegreeOfParallelism = numOfSteps };
262+
ParallelOptions parallelOptions = CreateParallelOptions(parallelSettings, numOfSteps);
263263
RowIntervalOperationWrapper<T, TBuffer> wrappingOperation = new(top, bottom, verticalStep, bufferLength, allocator, in operation);
264264

265265
_ = Parallel.For(
@@ -272,6 +272,37 @@ public static void IterateRowIntervals<T, TBuffer>(
272272
[MethodImpl(InliningOptions.ShortMethod)]
273273
private static int DivideCeil(long dividend, int divisor) => (int)Math.Min(1 + ((dividend - 1) / divisor), int.MaxValue);
274274

275+
/// <summary>
276+
/// Creates the <see cref="ParallelOptions"/> for the current iteration.
277+
/// </summary>
278+
/// <param name="parallelSettings">The execution settings.</param>
279+
/// <param name="numOfSteps">The number of row partitions to execute.</param>
280+
/// <returns>The <see cref="ParallelOptions"/> instance.</returns>
281+
[MethodImpl(InliningOptions.ShortMethod)]
282+
private static ParallelOptions CreateParallelOptions(in ParallelExecutionSettings parallelSettings, int numOfSteps)
283+
=> new() { MaxDegreeOfParallelism = parallelSettings.MaxDegreeOfParallelism == -1 ? -1 : numOfSteps };
284+
285+
/// <summary>
286+
/// Calculates the number of row partitions to execute for the given region.
287+
/// </summary>
288+
/// <param name="width">The width of the region.</param>
289+
/// <param name="height">The height of the region.</param>
290+
/// <param name="parallelSettings">The execution settings.</param>
291+
/// <returns>The number of row partitions to execute.</returns>
292+
[MethodImpl(InliningOptions.ShortMethod)]
293+
private static int GetNumberOfSteps(int width, int height, in ParallelExecutionSettings parallelSettings)
294+
{
295+
int maxSteps = DivideCeil(width * (long)height, parallelSettings.MinimumPixelsProcessedPerTask);
296+
297+
if (parallelSettings.MaxDegreeOfParallelism == -1)
298+
{
299+
// Row batching cannot produce more useful partitions than the number of rows available.
300+
return Math.Min(height, maxSteps);
301+
}
302+
303+
return Math.Min(parallelSettings.MaxDegreeOfParallelism, maxSteps);
304+
}
305+
275306
private static void ValidateRectangle(Rectangle rectangle)
276307
{
277308
Guard.MustBeGreaterThan(
@@ -284,4 +315,35 @@ private static void ValidateRectangle(Rectangle rectangle)
284315
0,
285316
$"{nameof(rectangle)}.{nameof(rectangle.Height)}");
286317
}
318+
319+
/// <summary>
320+
/// Validates the supplied <see cref="ParallelExecutionSettings"/>.
321+
/// </summary>
322+
/// <param name="parallelSettings">The execution settings.</param>
323+
/// <exception cref="ArgumentOutOfRangeException">
324+
/// Thrown when <see cref="ParallelExecutionSettings.MaxDegreeOfParallelism"/> or
325+
/// <see cref="ParallelExecutionSettings.MinimumPixelsProcessedPerTask"/> is invalid.
326+
/// </exception>
327+
/// <exception cref="ArgumentNullException">
328+
/// Thrown when <see cref="ParallelExecutionSettings.MemoryAllocator"/> is null.
329+
/// This also guards the public <see cref="ParallelExecutionSettings"/> default value, which bypasses constructor validation.
330+
/// </exception>
331+
private static void ValidateSettings(in ParallelExecutionSettings parallelSettings)
332+
{
333+
// ParallelExecutionSettings is a public struct, so callers can pass default and bypass constructor validation.
334+
if (parallelSettings.MaxDegreeOfParallelism is 0 or < -1)
335+
{
336+
throw new ArgumentOutOfRangeException(
337+
$"{nameof(parallelSettings)}.{nameof(ParallelExecutionSettings.MaxDegreeOfParallelism)}");
338+
}
339+
340+
Guard.MustBeGreaterThan(
341+
parallelSettings.MinimumPixelsProcessedPerTask,
342+
0,
343+
$"{nameof(parallelSettings)}.{nameof(ParallelExecutionSettings.MinimumPixelsProcessedPerTask)}");
344+
345+
Guard.NotNull(
346+
parallelSettings.MemoryAllocator,
347+
$"{nameof(parallelSettings)}.{nameof(ParallelExecutionSettings.MemoryAllocator)}");
348+
}
287349
}

src/ImageSharp/Common/Helpers/Numerics.cs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -643,6 +643,20 @@ public static Vector256<float> UnPremultiply(Vector256<float> source, Vector256<
643643
return Avx.Blend(result, alpha, BlendAlphaControl);
644644
}
645645

646+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
647+
public static Vector512<float> UnPremultiply(Vector512<float> source, Vector512<float> alpha)
648+
{
649+
// Check if alpha is zero to avoid division by zero
650+
Vector512<float> zeroMask = Vector512.Equals(alpha, Vector512<float>.Zero);
651+
652+
// Divide source by alpha if alpha is nonzero, otherwise set all components to match the source value
653+
Vector512<float> result = Vector512.ConditionalSelect(zeroMask, source, source / alpha);
654+
655+
// Blend the result with the alpha vector to ensure that the alpha component is unchanged
656+
Vector512<float> alphaMask = Vector512.Create(0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1).AsSingle();
657+
return Vector512.ConditionalSelect(alphaMask, alpha, result);
658+
}
659+
646660
/// <summary>
647661
/// Permutes the given vector return a new instance with all the values set to <see cref="Vector4.W"/>.
648662
/// </summary>
@@ -690,7 +704,7 @@ public static Vector4 WithW(Vector4 value, Vector4 w)
690704
/// </summary>
691705
/// <param name="vectors">The span of vectors</param>
692706
[MethodImpl(MethodImplOptions.AggressiveInlining)]
693-
public static unsafe void CubePowOnXYZ(Span<Vector4> vectors)
707+
public static void CubePowOnXYZ(Span<Vector4> vectors)
694708
{
695709
ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors);
696710
ref Vector4 endRef = ref Unsafe.Add(ref baseRef, (uint)vectors.Length);

src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs

Lines changed: 0 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -601,51 +601,6 @@ private static void Shuffle4Slice3(
601601
}
602602
}
603603

604-
/// <summary>
605-
/// Performs a multiplication and an addition of the <see cref="Vector256{Single}"/>.
606-
/// TODO: Fix. The arguments are in a different order to the FMA intrinsic.
607-
/// </summary>
608-
/// <remarks>ret = (vm0 * vm1) + va</remarks>
609-
/// <param name="va">The vector to add to the intermediate result.</param>
610-
/// <param name="vm0">The first vector to multiply.</param>
611-
/// <param name="vm1">The second vector to multiply.</param>
612-
/// <returns>The <see cref="Vector256{T}"/>.</returns>
613-
[MethodImpl(InliningOptions.AlwaysInline)]
614-
public static Vector256<float> MultiplyAdd(
615-
Vector256<float> va,
616-
Vector256<float> vm0,
617-
Vector256<float> vm1)
618-
{
619-
if (Fma.IsSupported)
620-
{
621-
return Fma.MultiplyAdd(vm1, vm0, va);
622-
}
623-
624-
return va + (vm0 * vm1);
625-
}
626-
627-
/// <summary>
628-
/// Performs a multiplication and a negated addition of the <see cref="Vector256{Single}"/>.
629-
/// </summary>
630-
/// <remarks>ret = c - (a * b)</remarks>
631-
/// <param name="a">The first vector to multiply.</param>
632-
/// <param name="b">The second vector to multiply.</param>
633-
/// <param name="c">The vector to add negated to the intermediate result.</param>
634-
/// <returns>The <see cref="Vector256{T}"/>.</returns>
635-
[MethodImpl(InliningOptions.ShortMethod)]
636-
public static Vector256<float> MultiplyAddNegated(
637-
Vector256<float> a,
638-
Vector256<float> b,
639-
Vector256<float> c)
640-
{
641-
if (Fma.IsSupported)
642-
{
643-
return Fma.MultiplyAddNegated(a, b, c);
644-
}
645-
646-
return Avx.Subtract(c, Avx.Multiply(a, b));
647-
}
648-
649604
/// <summary>
650605
/// Blend packed 8-bit integers from <paramref name="left"/> and <paramref name="right"/> using <paramref name="mask"/>.
651606
/// The high bit of each corresponding <paramref name="mask"/> byte determines the selection.

src/ImageSharp/Common/Helpers/Vector256Utilities.cs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,28 @@ public static Vector256<float> MultiplyAdd(
115115
return va + (vm0 * vm1);
116116
}
117117

118+
/// <summary>
119+
/// Performs a multiplication and a negated addition of the <see cref="Vector256{Single}"/>.
120+
/// </summary>
121+
/// <remarks>ret = va - (vm0 * vm1)</remarks>
122+
/// <param name="va">The vector to add to the negated intermediate result.</param>
123+
/// <param name="vm0">The first vector to multiply.</param>
124+
/// <param name="vm1">The second vector to multiply.</param>
125+
/// <returns>The <see cref="Vector256{T}"/>.</returns>
126+
[MethodImpl(InliningOptions.ShortMethod)]
127+
public static Vector256<float> MultiplyAddNegated(
128+
Vector256<float> va,
129+
Vector256<float> vm0,
130+
Vector256<float> vm1)
131+
{
132+
if (Fma.IsSupported)
133+
{
134+
return Fma.MultiplyAddNegated(vm0, vm1, va);
135+
}
136+
137+
return va - (vm0 * vm1);
138+
}
139+
118140
/// <summary>
119141
/// Performs a multiplication and a subtraction of the <see cref="Vector256{Single}"/>.
120142
/// </summary>

src/ImageSharp/Common/Helpers/Vector512Utilities.cs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,21 @@ public static Vector512<float> MultiplyAdd(
8787
Vector512<float> vm1)
8888
=> Avx512F.FusedMultiplyAdd(vm0, vm1, va);
8989

90+
/// <summary>
91+
/// Performs a multiplication and a negated addition of the <see cref="Vector512{Single}"/>.
92+
/// </summary>
93+
/// <remarks>ret = va - (vm0 * vm1)</remarks>
94+
/// <param name="va">The vector to add to the negated intermediate result.</param>
95+
/// <param name="vm0">The first vector to multiply.</param>
96+
/// <param name="vm1">The second vector to multiply.</param>
97+
/// <returns>The <see cref="Vector512{T}"/>.</returns>
98+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
99+
public static Vector512<float> MultiplyAddNegated(
100+
Vector512<float> va,
101+
Vector512<float> vm0,
102+
Vector512<float> vm1)
103+
=> Avx512F.FusedMultiplyAddNegated(vm0, vm1, va);
104+
90105
/// <summary>
91106
/// Restricts a vector between a minimum and a maximum value.
92107
/// </summary>

src/ImageSharp/Configuration.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ public Configuration(params IImageFormatConfigurationModule[] configurationModul
6464
/// <summary>
6565
/// Gets or sets the maximum number of concurrent tasks enabled in ImageSharp algorithms
6666
/// configured with this <see cref="Configuration"/> instance.
67+
/// Set to <c>-1</c> to leave the degree of parallelism unbounded.
6768
/// Initialized with <see cref="Environment.ProcessorCount"/> by default.
6869
/// </summary>
6970
public int MaxDegreeOfParallelism

src/ImageSharp/Formats/Gif/GifDecoderCore.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,7 @@ private bool ReadFrame<TPixel>(
468468
int length = this.currentLocalColorTableSize = this.imageDescriptor.LocalColorTableSize * 3;
469469
this.currentLocalColorTable ??= this.configuration.MemoryAllocator.Allocate<byte>(768, AllocationOptions.Clean);
470470
stream.Read(this.currentLocalColorTable.GetSpan()[..length]);
471-
rawColorTable = this.currentLocalColorTable!.GetSpan()[..length];
471+
rawColorTable = this.currentLocalColorTable.GetSpan()[..length];
472472
}
473473
else if (this.globalColorTable != null)
474474
{

0 commit comments

Comments
 (0)