Wrong shadow map sampling depending on camera pose

Hi,

I wrote a custom vertex / fragment shader for the built-in render pipeline which includes shadow receiving. The shader generally works, but it turned out that with some camera poses, the shadow receiving (implemented with UNITY_SHADOW_ATTENUATION) breaks. When this happens, instead of proper shadow receiving, it looks like the cascade shadow map atlas becomes visible in the mesh instead. This happens on Linux with OpenGL, with Unity 2021.3.15f1. Here are some images:

Correct shadow receiving (although the quality isn’t great, but that is another issue):
8651145--1164123--Unity_shadows_correct.png

Wrong shadow receiving after moving the camera, with what looks like the parts of the shadow map atlas highlighted:

So far, I had no success trying to fix this, so I wanted to ask whether anyone has seen this issue before and knows how to fix it.

For reference, the custom shader is embedded below. In principle, it follows the shadow receiving instructions from here: Unity - Manual: Custom shader fundamentals
I upgraded to the macros with the UNITY_ prefix according to that forum post, but that did not fix the issue: Shadow problem of android platform [Vert Frag Shader]

Shader pass definition:

Shader "Shadername"
{
    Properties
    {
        _Ambient ("Ambient", Float) = 0.2
    }
   
    SubShader
    {
        Tags { "RenderType" = "Opaque" }
        LOD 100
       
        // The vertex shaders mirror the local x coordinates of the mesh to adapt the meshes,
        // which use a right-handed coordinate system, to Unity's left-handed coordinate system.
        // Because of that, the side for back-face culling must be flipped as well.
        Cull Front
       
        // (other passes ...)
        
        Pass
        {
            CGPROGRAM
            #pragma vertex vert
            #pragma fragment frag
           
            #pragma multi_compile_instancing
            #pragma multi_compile_fog
           
            // Shadow receiving
            #pragma multi_compile_fwdbase nolightmap nodirlightmap nodynlightmap novertexlight
           
            #include "Builtin_forward.hlsl"
            ENDCG
        }
        
        // (other passes ...)
    }
}

Content of Builtin_forward.hlsl:
Shader code (long)

#include "UnityCG.cginc"
#include "AutoLight.cginc"

struct appdata {
  // Note: Automatic conversion of position and texcoord to float at this point works with Metal, but fails with OpenGL.
  //       Thus, these attributes must be left as uintX here even though they get directly converted to floatX.
  uint4 position    : POSITION;
  uint2 texcoord    : TEXCOORD0;
  uint4 nodeIndices : TEXCOORD1;
  uint4 nodeWeights : TEXCOORD2;
  UNITY_VERTEX_INPUT_INSTANCE_ID
};

struct shadowinput {
  float4 vertex;
};

struct v2f {
  float4 pos               : SV_POSITION;  // must be named "pos" for TRANSFER_SHADOW()
  centroid float2 texcoord : TEXCOORD0;
  float3 worldPos          : TEXCOORD1;
  UNITY_FOG_COORDS(2)        // put fog coordinates into TEXCOORD2 if enabled
  UNITY_SHADOW_COORDS(3)     // put shadows data into TEXCOORD3 if enabled;
                             // NOTE: As of the time of writing, Unity's documentation does not seem to mention UNITY_SHADOW_COORDS at all,
                             //       but according to https://catlikecoding.com/unity/tutorials/rendering/part-17/, it is a newer version of SHADOW_COORDS().
  UNITY_VERTEX_OUTPUT_STEREO
};

#define K 4

#if defined(SHADER_TARGET_GLSL) || defined(SHADER_API_GLCORE) || defined(SHADER_API_GLES) || defined(SHADER_API_GLES3)
  Texture2D_float<uint4> deformationState;  // Note: The _float suffix makes this a `highp` sampler in OpenGL ES, which is required for it to work on Android
  int deformationStateWidth;  // Note: We pass this in addition to `deformationState` itself since querying the size from the texture object did not work when compiled to GLSL
#else
  StructuredBuffer<float> deformationState;
#endif

texture2D textureLuma;
texture2D textureChromaU;
texture2D textureChromaV;

float4 bboxMin;
float4 vertexFactor;
float4 textureLumaSize;  // Note: Do not call this "textureSize", as this may be reserved in some shader languages

float _Ambient;

inline float3 DecodePosition(uint4 position) {
  return bboxMin.xyz + vertexFactor.xyz * float3(position.xyz);
}

v2f vert(appdata vi) {
  v2f o;
  UNITY_SETUP_INSTANCE_ID(vi);
  UNITY_INITIALIZE_OUTPUT(v2f, o);
  UNITY_INITIALIZE_VERTEX_OUTPUT_STEREO(o);
 
  o.texcoord = float2(0.5 / 65536.0, 0.5 / 65536.0) + float2(1.0 / 65536.0, 1.0 / 65536.0) * float2(vi.texcoord);
 
  float weightsAsFloat[K];
 
  // De-quantize the weights to float
  for (int k = 0; k < K; ++ k) {
    uint nodeWeightK = vi.nodeWeights[k];
    weightsAsFloat[k] =
        (nodeWeightK == 1) ? (0.5 * (0.5 / 254.)) :
        ((nodeWeightK == 255) ? (253.75 / 254.) :
          ((max(nodeWeightK, 1) - 1) / 254.));
  }
 
  // Re-normalize the weights
  float weightFactor = 1. / (weightsAsFloat[0] + weightsAsFloat[1] + weightsAsFloat[2] + weightsAsFloat[3]);  // assumes K == 4
 
  for (int j = 0; j < K; ++ j) {
    weightsAsFloat[j] *= weightFactor;
  }
 
  // Compute the deformed vertex position
  float3 originalPosition = DecodePosition(vi.position);
  float3 deformedPosition = float3(0, 0, 0);
 
  for (int n = 0; n < K; ++ n) {
    #if defined(SHADER_TARGET_GLSL) || defined(SHADER_API_GLCORE) || defined(SHADER_API_GLES) || defined(SHADER_API_GLES3)
      uint nodeIndex = vi.nodeIndices[n];
      uint deformationStateWidthUnsigned = deformationStateWidth;
     
      int texelIdx = 3 * nodeIndex;
      int texelFetchY = texelIdx / deformationStateWidthUnsigned;
      float4 deformationStateA = asfloat(deformationState.Load(int3(texelIdx - texelFetchY * deformationStateWidthUnsigned, texelFetchY, /*lod*/ 0)));
     
      ++ texelIdx;
      texelFetchY = texelIdx / deformationStateWidthUnsigned;
      float4 deformationStateB = asfloat(deformationState.Load(int3(texelIdx - texelFetchY * deformationStateWidthUnsigned, texelFetchY, /*lod*/ 0)));
     
      ++ texelIdx;
      texelFetchY = texelIdx / deformationStateWidthUnsigned;
      float4 deformationStateC = asfloat(deformationState.Load(int3(texelIdx - texelFetchY * deformationStateWidthUnsigned, texelFetchY, /*lod*/ 0)));
     
      deformedPosition +=
          weightsAsFloat[n] *
          (float3(deformationStateA[0] * originalPosition.x + deformationStateA[3] * originalPosition.y + deformationStateB[2] * originalPosition.z,
                  deformationStateA[1] * originalPosition.x + deformationStateB[0] * originalPosition.y + deformationStateB[3] * originalPosition.z,
                  deformationStateA[2] * originalPosition.x + deformationStateB[1] * originalPosition.y + deformationStateC[0] * originalPosition.z) +
          float3(deformationStateC[1], deformationStateC[2], deformationStateC[3]));
    #else
      uint baseIdx = 12 * vi.nodeIndices[n];
     
      deformedPosition +=
          weightsAsFloat[n] *
          (float3(deformationState[baseIdx + 0] * originalPosition.x + deformationState[baseIdx + 3] * originalPosition.y + deformationState[baseIdx + 6] * originalPosition.z,
                  deformationState[baseIdx + 1] * originalPosition.x + deformationState[baseIdx + 4] * originalPosition.y + deformationState[baseIdx + 7] * originalPosition.z,
                  deformationState[baseIdx + 2] * originalPosition.x + deformationState[baseIdx + 5] * originalPosition.y + deformationState[baseIdx + 8] * originalPosition.z) +
          float3(deformationState[baseIdx + 9], deformationState[baseIdx + 10], deformationState[baseIdx + 11]));
    #endif
  }
 
  deformedPosition.x = -1 * deformedPosition.x;
 
  o.worldPos = mul(UNITY_MATRIX_M, deformedPosition);
  o.pos = UnityObjectToClipPos(deformedPosition);
 
  // Compute fog amount from clip space position
  UNITY_TRANSFER_FOG(o, o.pos);
 
  // Note: For some platforms (e.g., Android), the TRANSFER_SHADOW() macro assumes that the vertex position can be accessed as "v.vertex".
  //       Since our input attribute is however encoded as uint4 (which is not suitable for direct use by the macro),
  //       we use a custom helper struct "shadowinput" here to provide the input for the macro instead.
  // Note: As of the time of writing, Unity still does not seem to have any documentation on UNITY_TRANSFER_SHADOW(),
  //       despite it apparently behaving better than the old TRANSFER_SHADOW().
  //       There is this forum post of a Unity employee:
  //       https://discussions.unity.com/t/748622/6
  shadowinput v;
  v.vertex = float4(deformedPosition, 1.0);
  UNITY_TRANSFER_SHADOW(o, vi.texcoord);  // vi.texcoord is used as dummy since we don't need lightmap coords
 
  return o;
}

half3 SampleRGB(uint2 xy) {
  half luma = textureLuma.Load(int3(xy, 0)).x;
  half chromaU = textureChromaU.Load(int3(xy / 2, 0)).x;
  half chromaV = textureChromaV.Load(int3(xy / 2, 0)).x;
 
  luma -= 16. / 255.;
  chromaU -= 128. / 255.;
  chromaV -= 128. / 255.;
 
  const half3 srgb = clamp(half3(
      1.164f * luma                    + 1.596f * chromaV,
      1.164f * luma - 0.392f * chromaU - 0.813f * chromaV,
      1.164f * luma + 2.017f * chromaU                   ), 0.f, 1.f);
 
  #if UNITY_COLORSPACE_GAMMA
    return srgb;
  #else
    return GammaToLinearSpace(srgb);
  #endif
}

fixed4 frag(v2f i) : SV_Target {
  // Simple bilinear interpolation (in linear RGB space, after converting the four input colors from YUV to linear RGB).
  UNITY_SETUP_STEREO_EYE_INDEX_POST_VERTEX(i);
 
  float2 xy = textureLumaSize.xy * i.texcoord - float2(0.5, 0.5);
  uint2 baseXY = uint2(xy);
  float2 fract = frac(xy);
 
  const half3 topLeft = SampleRGB(baseXY);
  const half3 topRight = SampleRGB(uint2(baseXY.x + 1, baseXY.y));
  const half3 bottomLeft = SampleRGB(uint2(baseXY.x, baseXY.y + 1));
  const half3 bottomRight = SampleRGB(uint2(baseXY.x + 1, baseXY.y + 1));
 
  // Bilinear interpolation.
  const half topLeftWeight     = (1.0f - fract.x) * (1.0f - fract.y);
  const half topRightWeight    =         fract.x  * (1.0f - fract.y);
  const half bottomLeftWeight  = (1.0f - fract.x) *         fract.y;
  const half bottomRightWeight =         fract.x  *         fract.y;
  fixed4 col = fixed4(
      topLeftWeight * topLeft +
      topRightWeight * topRight +
      bottomLeftWeight * bottomLeft +
      bottomRightWeight * bottomRight,
      1.0f);
 
  // Compute shadow attenuation (1.0 = fully lit, 0.0 = fully shadowed).
  // Regarding UNITY_SHADOW_ATTENUATION(), see the comment on UNITY_TRANSFER_SHADOW() above.
  col.rgb *= _Ambient + (1.0 - _Ambient) * UNITY_SHADOW_ATTENUATION(i, i.worldPos);
 
  // Apply fog
  UNITY_APPLY_FOG(i.fogCoord, col);
 
  return col;
}

The relevant parts of this should be the following ones:

struct shadowinput {
  float4 vertex;
};

struct v2f {
  float4 pos               : SV_POSITION;  // must be named "pos" for TRANSFER_SHADOW()
  centroid float2 texcoord : TEXCOORD0;
  float3 worldPos          : TEXCOORD1;
  UNITY_FOG_COORDS(2)        // put fog coordinates into TEXCOORD2 if enabled
  UNITY_SHADOW_COORDS(3)     // put shadows data into TEXCOORD3 if enabled;
                             // NOTE: As of the time of writing, Unity's documentation does not seem to mention UNITY_SHADOW_COORDS at all,
                             //       but according to https://catlikecoding.com/unity/tutorials/rendering/part-17/, it is a newer version of SHADOW_COORDS().
  UNITY_VERTEX_OUTPUT_STEREO
};
  // Note: For some platforms (e.g., Android), the TRANSFER_SHADOW() macro assumes that the vertex position can be accessed as "v.vertex".
  //       Since our input attribute is however encoded as uint4 (which is not suitable for direct use by the macro),
  //       we use a custom helper struct "shadowinput" here to provide the input for the macro instead.
  // Note: As of the time of writing, Unity still does not seem to have any documentation on UNITY_TRANSFER_SHADOW(),
  //       despite it apparently behaving better than the old TRANSFER_SHADOW().
  //       There is this forum post of a Unity employee:
  //       https://discussions.unity.com/t/748622/6
  shadowinput v;
  v.vertex = float4(deformedPosition, 1.0);
  UNITY_TRANSFER_SHADOW(o, vi.texcoord);  // vi.texcoord is used as dummy since we don't need lightmap coords
  // Compute shadow attenuation (1.0 = fully lit, 0.0 = fully shadowed).
  // Regarding UNITY_SHADOW_ATTENUATION(), see the comment on UNITY_TRANSFER_SHADOW() above.
  col.rgb *= _Ambient + (1.0 - _Ambient) * UNITY_SHADOW_ATTENUATION(i, i.worldPos);

To stress this again, the shader works fine most of the time, the shadow receiving just sometimes breaks depending on the camera pose. The mesh itself always looks fine.

Thanks in advance for any hints!

I think that I found the problem: The shader pass was missing the “LightMode” = “ForwardBase” tag. After adding that, shadow receiving seems to work consistently.

        Pass
        {
            Tags { "LightMode" = "ForwardBase" }
          
            // ...
        }