Hello everyone,
I am trying to compare performance between mono threading, multi-threading and compute shaders and something hits me.
With compute shaders the execution time varies a lot but I could not understand why. Here is the result of 4 different executions.
The first column is the time in milliseconds to create the array, the second is the time in mon thread, the third is the time for multi-threading and the last is the compute shader.
As you can see, the time for the last one varies a lot compared to the rest. It can also perform better than multi-threading and sometimes worse.
It seems that this is not very consistent.
Here is the C# code:
public class ArrayGenerator : MonoBehaviour
{
[SerializeField] int arraySize = 67107840;
int resolution;
float[] array;
[ContextMenu("Update Array")]
void UpdateArray()
{
double arrayCreation = CalculateOperationMilliseconds(() =>
{
array = new float[arraySize];
});
resolution = (int)Mathf.Sqrt(arraySize) + 1;
double mainThread = CalculateOperationMilliseconds(CPU_MainThread);
double multiThreads = CalculateOperationMilliseconds(CPU_MultiThreads);
double gpu = CalculateOperationMilliseconds(GPU);
Debug.Log($"Num of ticks: {arrayCreation} / {mainThread} / {multiThreads} / {gpu}");
}
void CPU_MainThread()
{
for (int i = 0; i < arraySize; i++)
{
int x = i % resolution;
int y = i / resolution;
array[i] = Mathf.PerlinNoise(x, y);
}
}
void CPU_MultiThreads()
{
Parallel.For(0, arraySize, i =>
{
int x = i % resolution;
int y = i / resolution;
array[i] = Mathf.PerlinNoise(x, y);
});
}
ComputeShader computeShader;
void GPU()
{
if (computeShader == null) computeShader = ComputeHelper.LoadComputeShader("ArrayOperation");
int kernelIndex = computeShader.FindKernel("CSMain");
ComputeBuffer arrayBuffer = ComputeHelper.CreateBuffer<float>(arraySize);
computeShader.SetBuffer(kernelIndex, "array", arrayBuffer);
computeShader.SetInt("arraySize", arraySize);
computeShader.SetInt("resolution", resolution);
ComputeHelper.Dispatch(computeShader, kernelIndex, arraySize, 1, 1);
arrayBuffer.GetData(array);
arrayBuffer.Release();
}
public static double CalculateOperationMilliseconds(Action operation)
{
Stopwatch watch = Stopwatch.StartNew();
operation.Invoke();
return watch.Elapsed.TotalMilliseconds;
}
}
Here is the compute shader:
#pragma kernel CSMain
RWStructuredBuffer<float> array;
uint arraySize;
uint resolution;
uint GetIndex(uint3 id)
{
return id.x + id.y * resolution;
}
float2 unity_gradientNoise_dir(float2 p)
{
p = p % 289;
float x = (34 * p.x + 1) * p.x % 289 + p.y;
x = (34 * x + 1) * x % 289;
x = frac(x / 41) * 2 - 1;
return normalize(float2(x - floor(x + 0.5), abs(x) - 0.5));
}
float unity_gradientNoise(float2 p)
{
float2 ip = floor(p);
float2 fp = frac(p);
float d00 = dot(unity_gradientNoise_dir(ip), fp);
float d01 = dot(unity_gradientNoise_dir(ip + float2(0, 1)), fp - float2(0, 1));
float d10 = dot(unity_gradientNoise_dir(ip + float2(1, 0)), fp - float2(1, 0));
float d11 = dot(unity_gradientNoise_dir(ip + float2(1, 1)), fp - float2(1, 1));
fp = fp * fp * fp * (fp * (fp * 6 - 15) + 10);
return lerp(lerp(d00, d01, fp.y), lerp(d10, d11, fp.y), fp.x);
}
float PerlinNoise(float2 UV)
{
return unity_gradientNoise(UV) + 0.5;
}
[numthreads(1024, 1, 1)]
void CSMain(uint3 id : SV_DispatchThreadID)
{
if (id.x >= arraySize)
return;
uint index = GetIndex(id);
int x = id.x % resolution;
int y = id.x / resolution;
array[index] = PerlinNoise(float2(x, y));
}