Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add SSE #19

Merged
merged 1 commit into from
Sep 1, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion Benchmarking/BenchmarkRunner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public class BenchmarkRunner
{
public static readonly string[] AvailableBenchmarks =
{
"ZIP", "GZIP", "BZIP2", "DEFLATE", "ARITHMETIC_INT", "ARITHMETIC_FLOAT", "AVX", "ALL", "COMPRESSION",
"ZIP", "GZIP", "BZIP2", "DEFLATE", "ARITHMETIC_INT", "ARITHMETIC_FLOAT", "AVX", "SSE", "ALL", "COMPRESSION",
"ARITHMETIC", "EXTENSION", "INT", "FLOAT"
};

Expand Down Expand Up @@ -91,6 +91,13 @@ public void RunBenchmark()
break;
}

case "SSE":
{
benchmarksToRun.Add(new SSE(options));

break;
}

case "COMPRESSION":
{
benchmarksToRun.Add(new ZIP(options));
Expand All @@ -112,6 +119,7 @@ public void RunBenchmark()
case "EXTENSION":
{
benchmarksToRun.Add(new AVX(options));
benchmarksToRun.Add(new SSE(options));

break;
}
Expand Down Expand Up @@ -139,6 +147,7 @@ public void RunBenchmark()
benchmarksToRun.Add(new Integer(options));
benchmarksToRun.Add(new Float(options));
benchmarksToRun.Add(new AVX(options));
benchmarksToRun.Add(new SSE(options));

break;
}
Expand Down
6 changes: 3 additions & 3 deletions Benchmarking/Extension/AVX.cs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public override void Run()

public override string GetDescription()
{
return "AVX benchmark by adding two vectors of 256 bit (4 floats) from a big vector of 1024 bit";
return "AVX benchmark adding vectors of 256 bit (8 floats) from a vector of 1024 floats";
}

public override void Initialize()
Expand Down Expand Up @@ -103,7 +103,7 @@ private unsafe void AssignScalarU(Span<float> scalar, Span<float> dst)
pDstCurrent += 8;
}

var scalarVector128 = Sse.LoadScalarVector128(pdst);
var scalarVector128 = Sse.LoadScalarVector128(psrc);

if (pDstCurrent + 4 <= pDstEnd)
{
Expand Down Expand Up @@ -140,7 +140,7 @@ private unsafe void AddScalarU(Span<float> scalar, Span<float> dst)
pDstCurrent += 8;
}

var scalarVector128 = Sse.LoadScalarVector128(pdst);
var scalarVector128 = Sse.LoadScalarVector128(psrc);

if (pDstCurrent + 4 <= pDstEnd)
{
Expand Down
146 changes: 146 additions & 0 deletions Benchmarking/Extension/SSE.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
#region using

using System;
using System.Collections.Generic;
using System.Threading.Tasks;
#if NETCOREAPP3_0
using System.Runtime.Intrinsics.X86;

#endif

#endregion

namespace Benchmarking.Extension
{
internal class SSE : Benchmark
{
private List<float[]> datas;
private float randomFloatingNumber;

public SSE(Options options) : base(options)
{
#if NETCOREAPP3_0
if (!Sse.IsSupported)
{
throw new NotSupportedException("Your hardware does not support SSE intrinsics!");
}
#else
throw new NotSupportedException("You need at least .NET Core 3 to use this benchmark!");
#endif
}

public override void Run()
{
#if NETCOREAPP3_0
var threads = new Task[options.Threads];

for (var i = 0; i < options.Threads; i++)
{
var i1 = i;
threads[i] = Task.Run(() =>
{
var randomFloatingSpan = new Span<float>(new[] { randomFloatingNumber });
var dst = new Span<float>(datas[i1]);

var iterations = 1000000000 / options.Threads;

for (var j = 0; j < iterations; j++)
{
AddScalarU(randomFloatingSpan, dst);
}

BenchmarkRunner.ReportProgress();
});
}

Task.WaitAll(threads);
#endif
}

public override string GetDescription()
{
return "SSE benchmark by adding two vectors of 128 bit (4 floats) from a big vector of 512 floats";
}

public override void Initialize()
{
randomFloatingNumber = float.Epsilon;

datas = new List<float[]>(options.Threads);

for (var i = 0; i < options.Threads; i++)
{
// Multiple of 256 to test AVX only
datas.Add(new float[512]);
}
}

public override double GetReferenceValue()
{
if (options.Threads == 1)
{
return 131352.0d;
}

return 32379.0d;
}

#if NETCOREAPP3_0
private unsafe void AssignScalarU(Span<float> scalar, Span<float> dst)
{
fixed (float* pdst = dst)
fixed (float* psrc = scalar)
{
var pDstEnd = pdst + dst.Length;
var pDstCurrent = pdst;

var scalarVector128 = Sse.LoadScalarVector128(psrc);

if (pDstCurrent + 4 <= pDstEnd)
{
Sse.Store(pDstCurrent, scalarVector128);

pDstCurrent += 4;
}

while (pDstCurrent < pDstEnd)
{
Sse.StoreScalar(pDstCurrent, scalarVector128);

pDstCurrent++;
}
}
}

private unsafe void AddScalarU(Span<float> scalar, Span<float> dst)
{
fixed (float* pdst = dst)
fixed (float* psrc = scalar)
{
var pDstEnd = pdst + dst.Length;
var pDstCurrent = pdst;

var scalarVector128 = Sse.LoadScalarVector128(psrc);

if (pDstCurrent + 4 <= pDstEnd)
{
var dstVector = Sse.LoadVector128(pDstCurrent);
dstVector = Sse.Add(dstVector, scalarVector128);
Sse.Store(pDstCurrent, dstVector);

pDstCurrent += 4;
}

while (pDstCurrent < pDstEnd)
{
var dstVector = Sse.LoadScalarVector128(pDstCurrent);
dstVector = Sse.AddScalar(dstVector, scalarVector128);
Sse.StoreScalar(pDstCurrent, dstVector);

pDstCurrent++;
}
}
}
#endif
}
}