Thanks so much for the explanation, Bgolus!
That’s really interesting. I had no idea that GPUs worked like that. I only started learning how to write shaders last month, and I still have much to learn.
I haven’t looked into the possibility of rendering to a render texture and then sampling from it. So far, I’ve been focusing on only processes that are part of Unity’s built-in pipeline, and with my basic knowledge I couldn’t see a way to make only this one shader render to a separate target.
From my cursory research and limited knowledge, making a single object shader output to a render texture requires either:
- A dedicated additional camera in the scene that has a render target set to a render texture asset.
- Using a setup script to assign multiple render targets to a single camera.
- Calling Blit, which I’m not certain how to use, but which can fill out a render texture when called.
And beforehand setting up a second shader that reads from the render texture during its pass.
So if I’m understanding you correctly, if I use any of these routes and simply have a lower resolution render texture, the GPU will use less cycles because it’s essentially running a smaller number of pixels in total. That’s very handy.
However, if I want to implement that, how would I go about creating the render texture in a way that still lets me utilize Unity’s depth texture? The way the clouds work is by sampling points beginning at the near side of the cloud volume and continuing until they hit the back side or the depth of that pixel (basically).
If I run Blit on OnPreRender() I don’t think I have access to the depth texture for that frame yet, and if I run it OnPostRender() it is rendered on top of everything that doesn’t write to the depth buffer.
Let me share my latest test code, as well as the relevant bit from the cloud shader.
Test Code:
Shader "Unlit/BufferTest2"
{
Properties
{
}
SubShader
{
CGINCLUDE
#include "UnityCG.cginc"
RWStructuredBuffer<fixed4> _LowRezCloudBuffer;
int _BufferWidth;
int _Subsample;
int bufferIndex(int2 coordinates){
return coordinates.y * _BufferWidth + coordinates.x;
}
ENDCG
Tags { "RenderType"="Opaque" "Queue"="Geometry"}
LOD 100
Cull Back ZWrite On ZTest LEqual
Pass
{
CGPROGRAM
#pragma vertex vert
#pragma fragment frag
void vert (float4 vertex : POSITION, out float4 outpos : SV_POSITION)
{
outpos = UnityObjectToClipPos(vertex);
}
fixed4 frag (UNITY_VPOS_TYPE screenPos : VPOS) : SV_Target
{
int2 clipCoords = ((screenPos.xy + _Subsample * 0.5) % _Subsample) - _Subsample;
clip(clipCoords);
int index = bufferIndex(screenPos.xy/_Subsample);
fixed4 color = fixed4(screenPos.xy/_ScreenParams.xy, 0, 1);
_LowRezCloudBuffer[index] = color;
return color;
}
ENDCG
}
Tags { "RenderType"="Opaque" }
LOD 100
Cull Back ZWrite On ZTest LEqual
Pass
{
CGPROGRAM
#pragma vertex vert
#pragma fragment frag
fixed4 multisample(float2 screenPos){
int baseIndex = bufferIndex(screenPos.xy/_Subsample);
fixed4 bottomLeft = _LowRezCloudBuffer[baseIndex];
fixed4 bottomRight = _LowRezCloudBuffer[baseIndex + 1];
fixed4 topLeft = _LowRezCloudBuffer[baseIndex + _BufferWidth];
fixed4 topRight = _LowRezCloudBuffer[baseIndex + _BufferWidth + 1];
fixed4 bottom = lerp(bottomLeft, bottomRight, (screenPos.x % _Subsample)/_Subsample);
fixed4 top = lerp(topLeft, topRight, (screenPos.x % _Subsample)/_Subsample);
return lerp(bottom, top, (screenPos.y % _Subsample)/_Subsample);
}
void vert (float4 vertex : POSITION, out float4 outpos : SV_POSITION)
{
outpos = UnityObjectToClipPos(vertex);
}
fixed4 frag (UNITY_VPOS_TYPE screenPos : VPOS) : SV_Target
{
return multisample(screenPos.xy);
}
ENDCG
}
}
}
Here’s what the test shader looks like, storing one pixel out of every 3x3 block.
And here it is without the data readback phase, again storing one pixel out of every 3x3 block.
The cloud shader is very long, and most of the code is adapted straight from Sebastian Lague (all due credit!), but here is the fragment shader.
fixed4 frag (v2f i, UNITY_VPOS_TYPE screenPos : VPOS) : SV_Target
{
/*Only sample the cloud infrequently. The intermediate points will be interpolated.*/
int2 clipCoords = (((screenPos.xy + _Subsample * 0.5) % _Subsample) - _Subsample);
clip(clipCoords * (_Subsample > 1 ? 1 : 0));
float screenX = remap(screenPos.x/_ScreenParams.x, 0, 1, -1, 1);
float screenY = remap(screenPos.y/_ScreenParams.y, 0, 1, -1, 1);
float4 clipSpaceVector = float4(screenX, screenY, 1,1);
float4 worldSpaceVector = mul(unity_CameraInvProjection, clipSpaceVector);
float distToFrag = length(worldSpaceVector.xyz);
/*precompute data needed for cloud marching*/
float3 boundsMax = mul(unity_ObjectToWorld, FRONTBOUND).xyz;
float3 boundsMin = mul(unity_ObjectToWorld, BACKBOUND).xyz;
float depth = LinearEyeDepth (tex2Dproj(_CameraDepthTexture, UNITY_PROJ_COORD(i.screenPosition)).x) * distToFrag;
float3 worldDirection = normalize(i.vectorToSurface);
float2 boxDistances = rayBoxDist(boundsMin, boundsMax, _WorldSpaceCameraPos.xyz, 1/worldDirection);
float distInsideBox = boxDistances.y;
float distToBox = max(boxDistances.x, 0);
clip(depth - distToBox);
float blueOffset = tex2D(_BlueNoise, float2(screenX, screenY));
/*This is the super expensive method that is supposed to be saved by clipping.*/
float2 cloudput = cloudMarch(normalize(i.vectorToSurface), depth, boundsMax, boundsMin, blueOffset);
float3 lightColoring = (_LightColor0.rgb * _LightColorInfluence) + 1 - _LightColorInfluence;
fixed4 result = fixed4(cloudput.x * lightColoring * _Color.rgb, 1-cloudput.y * _Color.a);
int index = _Subsample > 1 ? bufferIndex(screenPos.xy/_Subsample) : bufferIndex(screenPos.xy);
_LowRezCloudBuffer[index] = result;
return 0;
}
So if it’s true that this actually doesn’t save on computing power, how would I go about setting it up to use a render texture properly?