Cg to GLSL - really that optimized?

Unity’s documentation states that Cg to GLSL cross-compilation is well optimized. Unfortunately, I’ve just experienced something that lead me to question that.

There is a built-in (I guess) collection of mobile shaders. I used one of it called Mobile/Diffuse. I use it for simple lightmapped geometry with diffuse map. The shader has very simple GLSL ES code:

#ifdef VERTEX
#define gl_ModelViewProjectionMatrix glstate_matrix_mvp
uniform mat4 glstate_matrix_mvp;

varying highp vec2 xlv_TEXCOORD1;
varying highp vec2 xlv_TEXCOORD0;
uniform highp vec4 unity_LightmapST;

uniform highp vec4 _MainTex_ST;
attribute vec4 _glesMultiTexCoord1;
attribute vec4 _glesMultiTexCoord0;
attribute vec4 _glesVertex;
void main ()
{
  gl_Position = (gl_ModelViewProjectionMatrix * _glesVertex);
  xlv_TEXCOORD0 = ((_glesMultiTexCoord0.xy * _MainTex_ST.xy) + _MainTex_ST.zw);
  xlv_TEXCOORD1 = ((_glesMultiTexCoord1.xy * unity_LightmapST.xy) + unity_LightmapST.zw);
}



#endif
#ifdef FRAGMENT

varying highp vec2 xlv_TEXCOORD1;
varying highp vec2 xlv_TEXCOORD0;
uniform sampler2D unity_Lightmap;
uniform sampler2D _MainTex;
void main ()
{
  lowp vec4 c;
  lowp vec4 tmpvar_1;
  tmpvar_1 = texture2D (_MainTex, xlv_TEXCOORD0);
  c = vec4(0.0, 0.0, 0.0, 0.0);
  c.xyz = (tmpvar_1.xyz * (2.0 * texture2D (unity_Lightmap, xlv_TEXCOORD1).xyz));
  c.w = tmpvar_1.w;
  gl_FragData[0] = c;
}



#endif"

Nothing peculiar here - sampling a texture, a lightmap, and mul of the two.

To have more flexibility I decided to write my own shader similar to that one, that just samples the diffuse and lightmap and combines them. Here’s my shader:

Shader "Custom/Enviro/Solid/Lightmap"
{
	Properties
	{
		_MainTex ("Base (RGB) Trans (A)", 2D) = "white" {}
		_LightmapTex ("Lightmap", 2D) = "white" {} 
	}
	
	
	
	SubShader
	{
		Tags { "Queue" = "Geometry+1" }

		CGPROGRAM
		
		#pragma surface surf Custom vertex:vert nolightmap nodirlightmap noambient novertexlights



		sampler2D _MainTex;
		sampler2D _LightmapTex;
		
		
		
		struct CustomSurfaceOutput
		{
			half3 Albedo;
			half3 Normal;
			half3 Emission;
			half Specular;
			half Alpha;
			half3 Lightmap;
		};		
		
		
		
		half4 LightingCustom (CustomSurfaceOutput s, half3 lightDir, half atten)
		{
			half4 c;
			
			c.rgb = s.Albedo * s.Lightmap;
			c.a = s.Alpha;
		
			return c;
		}
		
		
		
		struct Input
		{
			float2 texCoord;
			float2 lightmapTexCoord;
		};
		
		
		
		void vert(inout appdata_full v, out Input o)
		{ 
			o.texCoord = v.texcoord.xy;
			o.lightmapTexCoord = v.texcoord1.xy;
		}
		
		
		
		void surf(Input IN, inout CustomSurfaceOutput o)
		{
			half4 diffuseMapSample = tex2D(_MainTex, IN.texCoord);
		
			o.Albedo = diffuseMapSample.rgb; 
			o.Alpha = diffuseMapSample.a;
		 
			half4 lightmap = tex2D(_LightmapTex, IN.lightmapTexCoord); 
			o.Lightmap = DecodeLightmap(lightmap);
		}

		
		
		ENDCG
	}
}

I would expect this code to generate pretty much the same or at least very similar shader to Mobile/Diffuse. What I got is this:

#ifdef VERTEX
#define gl_ModelViewProjectionMatrix glstate_matrix_mvp
uniform mat4 glstate_matrix_mvp;

varying lowp vec3 xlv_TEXCOORD3;
varying lowp vec3 xlv_TEXCOORD2;
varying highp vec2 xlv_TEXCOORD1;
varying highp vec2 xlv_TEXCOORD0;
uniform highp vec4 unity_Scale;

uniform highp mat4 _Object2World;
attribute vec4 _glesMultiTexCoord1;
attribute vec4 _glesMultiTexCoord0;
attribute vec3 _glesNormal;
attribute vec4 _glesVertex;
void main ()
{
  lowp vec3 tmpvar_1;
  mat3 tmpvar_2;
  tmpvar_2[0] = _Object2World[0].xyz;
  tmpvar_2[1] = _Object2World[1].xyz;
  tmpvar_2[2] = _Object2World[2].xyz;
  highp vec3 tmpvar_3;
  tmpvar_3 = (tmpvar_2 * (normalize (_glesNormal) * unity_Scale.w));
  tmpvar_1 = tmpvar_3;
  gl_Position = (gl_ModelViewProjectionMatrix * _glesVertex);
  xlv_TEXCOORD0 = _glesMultiTexCoord0.xy;
  xlv_TEXCOORD1 = _glesMultiTexCoord1.xy;
  xlv_TEXCOORD2 = tmpvar_1;
  xlv_TEXCOORD3 = vec3(0.0, 0.0, 0.0);
}



#endif
#ifdef FRAGMENT

varying lowp vec3 xlv_TEXCOORD3;
varying highp vec2 xlv_TEXCOORD1;
varying highp vec2 xlv_TEXCOORD0;
uniform sampler2D _MainTex;
uniform sampler2D _LightmapTex;
void main ()
{
  lowp vec4 c;
  mediump vec4 lightmap;
  mediump vec4 diffuseMapSample;
  lowp vec4 tmpvar_1;
  tmpvar_1 = texture2D (_MainTex, xlv_TEXCOORD0);
  diffuseMapSample = tmpvar_1;
  lowp vec4 tmpvar_2;
  tmpvar_2 = texture2D (_LightmapTex, xlv_TEXCOORD1);
  lightmap = tmpvar_2;
  mediump vec4 c_i0;
  c_i0.xyz = (diffuseMapSample.xyz * (2.0 * lightmap.xyz));
  c_i0.w = diffuseMapSample.w;
  c = c_i0;
  mediump vec3 tmpvar_3;
  tmpvar_3 = (c.xyz + (diffuseMapSample.xyz * xlv_TEXCOORD3));
  c.xyz = tmpvar_3;
  gl_FragData[0] = c;
}



#endif"

I suppose that all those temporary vars are not that relevant (the driver should optimize that, right?), but what the heck is xlv_TEXCOORD3 doing there? It’s a vec3 made of three 0s, passed as var from VS to PS, which is used somewhere at the end of PS to perform some stupid, completely irrelevant multiplication. Moreover, why is VS computing transformed normal vector if it’s not used anywhere?

Honestly, I would have not noticed that my shader has been cross-compiled in such a weird way if I had not tested it on my Tegra2-based device, where a frame’s time difference is somewhere around 5ms (Mobile/Diffuse is obviously faster). Sooo… am I doing something wrong or is Unity’s cross-compilation not that optimized?

Doesn’t look optimised. You should poke Aras to see what he thinks as he did the optimiser.

Just sent him a PM

“Well optimized” is obviously different from “always perfectly optimized”.
Also, Unity allows you to write perfectly optimized shaders in GLSL if you are up to it. :wink:

I’d love to, but can I use surface shader construct with GLSL shaders? :slight_smile:

No, but you can write GLSL shaders directly; for some tutorials see: https://en.wikibooks.org/wiki/GLSL_Programming/Unity

Great link. Thanks a lot for it. But… I’m now reading Aras’s comments on GLSL (http://forum.unity3d.com/threads/5142-Why-Cg-over-GLSL). I’ve already heard complaints about GLSL and it’s sort of risky to now switch to it from Cg when I have most of my material library written. I think I’ll wait a bit for Aras’s feedback on this.

Have fun with Cg then. :wink:

Hi Maxest,

I can see how it looks as if the cross compiling might be going astray, but there are a few things to consider in your examples.

You have a few mid level precision variables/vectors that the compiler wont optimize down to a fixed or lowp variable/vector. This can have a huge impact on performance in GLES because the hardware wont be able to process the data in a vector format.

This is my personal opinion but I would avoid using the Surface shader like structures and Pragmas in such a simple shader. Its just adding another level of abstraction between what you write and what gets compiled. Id say this is why you are getting oddball additions to your shader.

Ive found the cross-compilation to be very effective so far. However I will write the shader in CG with GLES in mind. Avoiding the following at the CG level can only help the compiler.

  1. Avoid High precision Mid precision variables in the fragment shader.
  2. Avoid swizzling of Low p vectors such as color.bgra, however in my tests masking is fine eg color.a or color.rgb
  3. Avoid Dependent texture fetches ( basicly UV’s that dont come directly from the vertex shader or are swizzled)
  4. Mask any operation you can eg max(colorA,colorB).rgb
  5. And an easy one to miss. Vectorize your code! fixed3 colorC = (colorA.rgb * colorB.rgb) * (colorA.a * 2); By forcing the compiler to process scalars and vectors separably it will cut down on processing for exactly the same result.

I hope this helps. I’ll do up an example of your shader to demonstrate how it helps tonight.

Kind regards
Bruno

Great points bruno, 3 thumbs up :slight_smile:

Thanks brn for the tips. I’ve just changed all half to fixed, and the generated code now is:

#ifdef VERTEX
#define gl_ModelViewProjectionMatrix glstate_matrix_mvp
uniform mat4 glstate_matrix_mvp;

varying lowp vec3 xlv_TEXCOORD3;
varying lowp vec3 xlv_TEXCOORD2;
varying highp vec2 xlv_TEXCOORD1;
varying highp vec2 xlv_TEXCOORD0;
uniform highp vec4 unity_Scale;

uniform highp mat4 _Object2World;
attribute vec4 _glesMultiTexCoord1;
attribute vec4 _glesMultiTexCoord0;
attribute vec3 _glesNormal;
attribute vec4 _glesVertex;
void main ()
{
  lowp vec3 tmpvar_1;
  mat3 tmpvar_2;
  tmpvar_2[0] = _Object2World[0].xyz;
  tmpvar_2[1] = _Object2World[1].xyz;
  tmpvar_2[2] = _Object2World[2].xyz;
  highp vec3 tmpvar_3;
  tmpvar_3 = (tmpvar_2 * (normalize (_glesNormal) * unity_Scale.w));
  tmpvar_1 = tmpvar_3;
  gl_Position = (gl_ModelViewProjectionMatrix * _glesVertex);
  xlv_TEXCOORD0 = _glesMultiTexCoord0.xy;
  xlv_TEXCOORD1 = _glesMultiTexCoord1.xy;
  xlv_TEXCOORD2 = tmpvar_1;
  xlv_TEXCOORD3 = vec3(0.0, 0.0, 0.0);
}



#endif
#ifdef FRAGMENT

varying lowp vec3 xlv_TEXCOORD3;
varying highp vec2 xlv_TEXCOORD1;
varying highp vec2 xlv_TEXCOORD0;
uniform sampler2D _MainTex;
uniform sampler2D _LightmapTex;
void main ()
{
  lowp vec4 c;
  lowp vec4 tmpvar_1;
  tmpvar_1 = texture2D (_MainTex, xlv_TEXCOORD0);
  lowp vec4 c_i0;
  c_i0.xyz = (tmpvar_1.xyz * (2.0 * texture2D (_LightmapTex, xlv_TEXCOORD1).xyz));
  c_i0.w = tmpvar_1.w;
  c = c_i0;
  c.xyz = (c_i0.xyz + (tmpvar_1.xyz * xlv_TEXCOORD3));
  gl_FragData[0] = c;
}



#endif"

So the float conversions are gone, but still some excessive code is generated…

BTW: I cannot manage to run any GLSL shader; I constantly get “No subshaders can run on this graphics card” error
EDIT: I guess the error is caused by the fact that I’m running Unity on Windows so it’s running under D3D and, surpsinigly, there is no way to switch the renderer in the editor…

EDIT2: I’ve just noticed that the excessive shader lines, both vertex and fragment, are generated when I pass #pragma nolightmap. So basically plain diffuse no lightmapped shader is longer than the same lightmapped shader.

For the following shader:

Shader "Custom/TEST2"
{
    Properties
    {
        _MainTex ("Base (RGB) Trans (A)", 2D) = "white" {}
        _LightmapTex ("Lightmap", 2D) = "white" {}
    }
   
   
   
    SubShader
    {
        Tags { "Queue" = "Geometry+1" }
 
        CGPROGRAM
       
        #pragma surface surf Custom vertex:vert nodirlightmap noambient novertexlights
 
        sampler2D _MainTex;
        sampler2D _LightmapTex;
              
        struct CustomSurfaceOutput
        {
            fixed3 Albedo;
            fixed3 Normal;
            fixed3 Emission;
            fixed Specular;
            fixed Alpha;
        };               
            
        struct Input
        {
            float2 texCoord;
        };   

        void vert(inout appdata_full v, out Input o)
        {
            o.texCoord = v.texcoord.xy;
        }

        fixed4 LightingCustom (CustomSurfaceOutput s, half3 lightDir, half atten)
        {
            fixed4 c;
           
            c.rgb = s.Albedo;
            c.a = s.Alpha;
       
            return c;
        }

        void surf(Input IN, inout CustomSurfaceOutput o)
        {
            fixed4 diffuseMapSample = tex2D(_MainTex, IN.texCoord);
       
            o.Albedo = diffuseMapSample.rgb;
            o.Alpha = diffuseMapSample.a;
        }

        ENDCG
    }
}

The following programs are generated.

non-lightmapped:

SubProgram "gles " {
Keywords { "DIRECTIONAL" "LIGHTMAP_OFF" "DIRLIGHTMAP_OFF" "SHADOWS_OFF" }
"!!GLES
#define SHADER_API_GLES 1
#define tex2D texture2D


#ifdef VERTEX
#define gl_ModelViewProjectionMatrix glstate_matrix_mvp
uniform mat4 glstate_matrix_mvp;

varying lowp vec3 xlv_TEXCOORD2;
varying lowp vec3 xlv_TEXCOORD1;
varying highp vec2 xlv_TEXCOORD0;
uniform highp vec4 unity_Scale;

uniform highp mat4 _Object2World;
attribute vec4 _glesMultiTexCoord0;
attribute vec3 _glesNormal;
attribute vec4 _glesVertex;
void main ()
{
  lowp vec3 tmpvar_1;
  mat3 tmpvar_2;
  tmpvar_2[0] = _Object2World[0].xyz;
  tmpvar_2[1] = _Object2World[1].xyz;
  tmpvar_2[2] = _Object2World[2].xyz;
  highp vec3 tmpvar_3;
  tmpvar_3 = (tmpvar_2 * (normalize (_glesNormal) * unity_Scale.w));
  tmpvar_1 = tmpvar_3;
  gl_Position = (gl_ModelViewProjectionMatrix * _glesVertex);
  xlv_TEXCOORD0 = _glesMultiTexCoord0.xy;
  xlv_TEXCOORD1 = tmpvar_1;
  xlv_TEXCOORD2 = vec3(0.0, 0.0, 0.0);
}



#endif
#ifdef FRAGMENT

varying lowp vec3 xlv_TEXCOORD2;
varying highp vec2 xlv_TEXCOORD0;
uniform sampler2D _MainTex;
void main ()
{
  lowp vec4 c;
  lowp vec4 tmpvar_1;
  tmpvar_1 = texture2D (_MainTex, xlv_TEXCOORD0);
  lowp vec4 c_i0;
  c_i0.xyz = tmpvar_1.xyz;
  c_i0.w = tmpvar_1.w;
  c = c_i0;
  c.xyz = (tmpvar_1.xyz + (tmpvar_1.xyz * xlv_TEXCOORD2));
  gl_FragData[0] = c;
}



#endif"
}

lightmapped:

SubProgram "gles " {
Keywords { "DIRECTIONAL" "LIGHTMAP_ON" "DIRLIGHTMAP_OFF" "SHADOWS_OFF" }
"!!GLES
#define SHADER_API_GLES 1
#define tex2D texture2D


#ifdef VERTEX
#define gl_ModelViewProjectionMatrix glstate_matrix_mvp
uniform mat4 glstate_matrix_mvp;

varying highp vec2 xlv_TEXCOORD1;
varying highp vec2 xlv_TEXCOORD0;
uniform highp vec4 unity_LightmapST;

attribute vec4 _glesMultiTexCoord1;
attribute vec4 _glesMultiTexCoord0;
attribute vec4 _glesVertex;
void main ()
{
  gl_Position = (gl_ModelViewProjectionMatrix * _glesVertex);
  xlv_TEXCOORD0 = _glesMultiTexCoord0.xy;
  xlv_TEXCOORD1 = ((_glesMultiTexCoord1.xy * unity_LightmapST.xy) + unity_LightmapST.zw);
}



#endif
#ifdef FRAGMENT

varying highp vec2 xlv_TEXCOORD1;
varying highp vec2 xlv_TEXCOORD0;
uniform sampler2D unity_Lightmap;
uniform sampler2D _MainTex;
void main ()
{
  lowp vec4 c;
  lowp vec4 tmpvar_1;
  tmpvar_1 = texture2D (_MainTex, xlv_TEXCOORD0);
  c = vec4(0.0, 0.0, 0.0, 0.0);
  c.xyz = (tmpvar_1.xyz * (2.0 * texture2D (unity_Lightmap, xlv_TEXCOORD1).xyz));
  c.w = tmpvar_1.w;
  gl_FragData[0] = c;
}



#endif"
}

Why is the non-lightmapped longer?

“C:\Program Files\Unity\Editor\Unity.exe” -force-opengl

Right… I read somewhere around that it was possible to force a built application to run under OGL, but not the editor. Thanks for the info

and you normally wouldn’t want to force it on windows either, not the full set of unity features are supported and I would expect force-opengl to be removed with the next release as the only reason for it to be present was texture access from outside, which since unity 3.5 is possible through directx

So I see Unity is not very friendly to OGL under Windows… :slight_smile:
I just wish that Cg->GLSL cross-compiler didn’t generate these excessive instructions. Maybe I should add it to the wishlist