Unity’s documentation states that Cg to GLSL cross-compilation is well optimized. Unfortunately, I’ve just experienced something that lead me to question that.
There is a built-in (I guess) collection of mobile shaders. I used one of it called Mobile/Diffuse. I use it for simple lightmapped geometry with diffuse map. The shader has very simple GLSL ES code:
#ifdef VERTEX
#define gl_ModelViewProjectionMatrix glstate_matrix_mvp
uniform mat4 glstate_matrix_mvp;
varying highp vec2 xlv_TEXCOORD1;
varying highp vec2 xlv_TEXCOORD0;
uniform highp vec4 unity_LightmapST;
uniform highp vec4 _MainTex_ST;
attribute vec4 _glesMultiTexCoord1;
attribute vec4 _glesMultiTexCoord0;
attribute vec4 _glesVertex;
void main ()
{
gl_Position = (gl_ModelViewProjectionMatrix * _glesVertex);
xlv_TEXCOORD0 = ((_glesMultiTexCoord0.xy * _MainTex_ST.xy) + _MainTex_ST.zw);
xlv_TEXCOORD1 = ((_glesMultiTexCoord1.xy * unity_LightmapST.xy) + unity_LightmapST.zw);
}
#endif
#ifdef FRAGMENT
varying highp vec2 xlv_TEXCOORD1;
varying highp vec2 xlv_TEXCOORD0;
uniform sampler2D unity_Lightmap;
uniform sampler2D _MainTex;
void main ()
{
lowp vec4 c;
lowp vec4 tmpvar_1;
tmpvar_1 = texture2D (_MainTex, xlv_TEXCOORD0);
c = vec4(0.0, 0.0, 0.0, 0.0);
c.xyz = (tmpvar_1.xyz * (2.0 * texture2D (unity_Lightmap, xlv_TEXCOORD1).xyz));
c.w = tmpvar_1.w;
gl_FragData[0] = c;
}
#endif"
Nothing peculiar here - sampling a texture, a lightmap, and mul of the two.
To have more flexibility I decided to write my own shader similar to that one, that just samples the diffuse and lightmap and combines them. Here’s my shader:
Shader "Custom/Enviro/Solid/Lightmap"
{
Properties
{
_MainTex ("Base (RGB) Trans (A)", 2D) = "white" {}
_LightmapTex ("Lightmap", 2D) = "white" {}
}
SubShader
{
Tags { "Queue" = "Geometry+1" }
CGPROGRAM
#pragma surface surf Custom vertex:vert nolightmap nodirlightmap noambient novertexlights
sampler2D _MainTex;
sampler2D _LightmapTex;
struct CustomSurfaceOutput
{
half3 Albedo;
half3 Normal;
half3 Emission;
half Specular;
half Alpha;
half3 Lightmap;
};
half4 LightingCustom (CustomSurfaceOutput s, half3 lightDir, half atten)
{
half4 c;
c.rgb = s.Albedo * s.Lightmap;
c.a = s.Alpha;
return c;
}
struct Input
{
float2 texCoord;
float2 lightmapTexCoord;
};
void vert(inout appdata_full v, out Input o)
{
o.texCoord = v.texcoord.xy;
o.lightmapTexCoord = v.texcoord1.xy;
}
void surf(Input IN, inout CustomSurfaceOutput o)
{
half4 diffuseMapSample = tex2D(_MainTex, IN.texCoord);
o.Albedo = diffuseMapSample.rgb;
o.Alpha = diffuseMapSample.a;
half4 lightmap = tex2D(_LightmapTex, IN.lightmapTexCoord);
o.Lightmap = DecodeLightmap(lightmap);
}
ENDCG
}
}
I would expect this code to generate pretty much the same or at least very similar shader to Mobile/Diffuse. What I got is this:
#ifdef VERTEX
#define gl_ModelViewProjectionMatrix glstate_matrix_mvp
uniform mat4 glstate_matrix_mvp;
varying lowp vec3 xlv_TEXCOORD3;
varying lowp vec3 xlv_TEXCOORD2;
varying highp vec2 xlv_TEXCOORD1;
varying highp vec2 xlv_TEXCOORD0;
uniform highp vec4 unity_Scale;
uniform highp mat4 _Object2World;
attribute vec4 _glesMultiTexCoord1;
attribute vec4 _glesMultiTexCoord0;
attribute vec3 _glesNormal;
attribute vec4 _glesVertex;
void main ()
{
lowp vec3 tmpvar_1;
mat3 tmpvar_2;
tmpvar_2[0] = _Object2World[0].xyz;
tmpvar_2[1] = _Object2World[1].xyz;
tmpvar_2[2] = _Object2World[2].xyz;
highp vec3 tmpvar_3;
tmpvar_3 = (tmpvar_2 * (normalize (_glesNormal) * unity_Scale.w));
tmpvar_1 = tmpvar_3;
gl_Position = (gl_ModelViewProjectionMatrix * _glesVertex);
xlv_TEXCOORD0 = _glesMultiTexCoord0.xy;
xlv_TEXCOORD1 = _glesMultiTexCoord1.xy;
xlv_TEXCOORD2 = tmpvar_1;
xlv_TEXCOORD3 = vec3(0.0, 0.0, 0.0);
}
#endif
#ifdef FRAGMENT
varying lowp vec3 xlv_TEXCOORD3;
varying highp vec2 xlv_TEXCOORD1;
varying highp vec2 xlv_TEXCOORD0;
uniform sampler2D _MainTex;
uniform sampler2D _LightmapTex;
void main ()
{
lowp vec4 c;
mediump vec4 lightmap;
mediump vec4 diffuseMapSample;
lowp vec4 tmpvar_1;
tmpvar_1 = texture2D (_MainTex, xlv_TEXCOORD0);
diffuseMapSample = tmpvar_1;
lowp vec4 tmpvar_2;
tmpvar_2 = texture2D (_LightmapTex, xlv_TEXCOORD1);
lightmap = tmpvar_2;
mediump vec4 c_i0;
c_i0.xyz = (diffuseMapSample.xyz * (2.0 * lightmap.xyz));
c_i0.w = diffuseMapSample.w;
c = c_i0;
mediump vec3 tmpvar_3;
tmpvar_3 = (c.xyz + (diffuseMapSample.xyz * xlv_TEXCOORD3));
c.xyz = tmpvar_3;
gl_FragData[0] = c;
}
#endif"
I suppose that all those temporary vars are not that relevant (the driver should optimize that, right?), but what the heck is xlv_TEXCOORD3 doing there? It’s a vec3 made of three 0s, passed as var from VS to PS, which is used somewhere at the end of PS to perform some stupid, completely irrelevant multiplication. Moreover, why is VS computing transformed normal vector if it’s not used anywhere?
Honestly, I would have not noticed that my shader has been cross-compiled in such a weird way if I had not tested it on my Tegra2-based device, where a frame’s time difference is somewhere around 5ms (Mobile/Diffuse is obviously faster). Sooo… am I doing something wrong or is Unity’s cross-compilation not that optimized?