Question about correctly transform normal to worldspace(non-uniform scaling)

In order to transform an object/local space normal to world space, it must handle differently in order for non-uniform scaling to work correctly. I wrote some possible way to transform object/local space normal to world space normal.

but I have some question about the shader:
1.why
worldNormal = normalize(mul(transpose(_World2Object),v.normal));// is wrong
but
worldNormal = normalize(mul(transpose((float3x3)_World2Object),v.normal)); //is right?

2.what is happening when we call UnityObjectToWorldNormal() from UnityCG.cginc?
Is it just a simplify version of return normalize(mul(v.normal,(float3x3)_World2Object));?

Shader "Test/CalculateWorldNormal"
{
    Properties
    {
        //enable select different compile shader
        [KeywordEnum(Wrong,Custom4x4Wrong,Custom3x3,Custom3x3Fast,Unity)] _Mode("world normal mode", Float) = 0
    }
    SubShader
    {
        Pass
        {
            CGPROGRAM
            #pragma vertex vert
            #pragma fragment frag
           
            #include "UnityCG.cginc" //for UnityObjectToWorldNormal() only

            //enable select different compile shader
            #pragma multi_compile _MODE_WRONG _MODE_CUSTOM4X4WRONG _MODE_CUSTOM3X3 _MODE_CUSTOM3X3FAST _MODE_UNITY

            struct appdata
            {
                float4 vertex : POSITION;
                float3 normal : NORMAL;
            };

            struct v2f
            {
                float4 color : COLOR;
                float4 vertex : SV_POSITION;
            };

            v2f vert (appdata v)
            {
                v2f o; 
                o.vertex = mul(UNITY_MATRIX_MVP, v.vertex);

                float3 worldNormal;

#if _MODE_WRONG
                //the way to transform a point can not apply to transforming a normal direction vector!
                //http://web.archive.org/web/20120228095346/http://www.arcsynthesis.org/gltut/Illumination/Tut09%20Normal%20Transformation.html
                //normal vector will be wrong if non-uniform scaling , but still correct if only uniform scaling
                worldNormal = normalize(mul(_Object2World, v.normal));
#elif _MODE_CUSTOM4X4WRONG
                //normal NOT correct(darker than correct)
                worldNormal = normalize(mul(transpose(_World2Object),v.normal)); // why this is wrong without forcing _World2Object to a (float3x3)? and why mul(float4x4 a,float3 b) will compile?
#elif _MODE_CUSTOM3X3
                //same as _MODE_CUSTOM4X4WRONG, but forcing _World2Object to a (float3x3), then it become correct, why?
                worldNormal = normalize(mul(transpose((float3x3)_World2Object), v.normal));
#elif _MODE_CUSTOM3X3FAST
                //result same as _MODE_CUSTOM, switch the mul() order to eliminate the need of transpose(), so faster?
                /*
                The following are algebrically equal (if not necessarily numerically equal):

                mul(M,v) == mul(v, tranpose(M))
                mul(v,M) == mul(tranpose(M), v)
                */
                worldNormal = normalize(mul(v.normal,(float3x3)_World2Object));
#elif _MODE_UNITY       
                //the best solution provided by unity, fastest.
                //copy from "UnityCG.cginc"
                /*
                // Transforms normal from object to world space
                inline float3 UnityObjectToWorldNormal( in float3 norm )
                {
                    // Multiply by transposed inverse matrix, actually using transpose() generates badly optimized code
                    return normalize(_World2Object[0].xyz * norm.x + _World2Object[1].xyz * norm.y + _World2Object[2].xyz * norm.z);
                }
                */
                worldNormal = UnityObjectToWorldNormal(v.normal);
#endif

                //just show world normal as color
                o.color.rgb = float4(worldNormal * .5 + .5, 0);
                return o;
            }
           
            fixed4 frag (v2f i) : SV_Target
            {
                return i.color;
            }
            ENDCG
        }
    }
}

_Object2World and _World2Object are 4x4 matrices with rotation, scale, and translation. Technically if you mul a float4x4 by a float3 the result should be an error, but shader compilers are funny and will probably expand that float3 normal to a float4 with a w of 1 causing it to take translation.

Doing a (float3x3) forces it to treat the the float3 normal properly, doing a float4(normal, 0) would also fix it.

And yes, what unity is doing is the algebraic equivalent of mul(transpose((float3x3)_World2Object), normal) or mul(normal, (float3x3)_World2Object) but broken down likely to prevent shader compilers from doing something funny with it (some mobile shader compilers for example are amazingly broken when doing mul).

1 Like