Hi there. I was experimenting with compute shaders. After a day of trial and error I got a grasp of how things work more or less.
Then I ran into an issue: with a heavy shader the nvidia driver crashes. It seems entirely related to the time the shader takes to execute: around 1 second.
So my question is, should I file a bug report to nvidia or is it normal? Should I split the computation to multiple passes?
The shader consists of a long dynamic loop (naive ray casting). Whether implemented as a SM5 Shader or Compute Shader the same thing happens. I don’t have experience with GPGPU but I’d expect it to be able to handle it.
// Each #kernel tells which function to compile; you can have many kernels
#pragma kernel BakeAmbientOcclusion
struct Vertex {
float3 position;
float3 normal;
float2 uv;
};
struct Triangle {
Vertex v1;
Vertex v2;
Vertex v3;
};
float3 _Direction;
int _TriCount;
int _Iterations;
StructuredBuffer<Triangle> _Mesh;
Texture2D<float4> _Normals;
Texture2D<float4> _Positions;
RWTexture2D<float4> _Result;
float intersect(float3 orig, float3 dir, float3 v0, float3 v1, float3 v2) {
float3 e1 = v1 - v0;
float3 e2 = v2 - v0;
float3 normal = normalize(cross(e1, e2));
float b = dot(normal, dir);
float3 w0 = orig - v0;
float a = -dot(normal, w0);
float t = a / b;
float3 p = orig + t * dir;
float uu, uv, vv, wu, wv, inverseD;
uu = dot(e1, e1);
uv = dot(e1, e2);
vv = dot(e2, e2);
float3 w = p - v0;
wu = dot(w, e1);
wv = dot(w, e2);
inverseD = uv * uv - uu * vv;
inverseD = 1.0f / inverseD;
float u = (uv * wv - vv * wu) * inverseD;
if (u < 0.0f || u > 1.0f)
return -1.0f;
float v = (uv * wu - uu * wv) * inverseD;
if (v < 0.0f || (u + v) > 1.0f)
return -1.0f;
//UV = float2(u,v);
return t;
}
bool RayCastBool(float3 origin, float3 direction) {
int triCount = _TriCount;
[loop]
for (int i=0; i<triCount; i++) {
Triangle tri = _Mesh[i];
float3 p1 = tri.v1.position;
float3 p2 = tri.v2.position;
float3 p3 = tri.v3.position;
float hit = intersect(origin, direction, p1, p2, p3);
if (hit > 0)
return true;
}
return false;
}
[numthreads(8,8,1)]
void BakeAmbientOcclusion(uint3 id : SV_DispatchThreadID) {
float3 fragNormal = _Normals[id.xy];
float3 fragPosition = _Positions[id.xy];
float occlusion = 0;
const int RayCount = 1;
for (int rayInd=0; rayInd<RayCount; rayInd++) {
float3 dir = _Direction;
if (RayCastBool(fragPosition, fragNormal)) {
occlusion += 1.0 / (float)RayCount;
}
}
_Result[id.xy] = float4(occlusion.xxx,1);
}
ps. the compute shader docs are a bit lacking.
