Performance-wise cel/cartoon shading on mobile?

Recently I’ve been working on 3d game designated for mobile devices. For this time I wasn’t very preoccupied with the performance, but the time has finally come and of course things doesn’t look good. I was wondering, as I’m using cartoon shading with threshold lookup light texture (with outlined silhouette), what is the most efficient toon shader to use with mobiles?

Maybe it will help to put the shader code, any tuning appreciated as I’m not a shader master

Shader "Custom/Outlined Diffuse Color" {
	Properties {
		_Color ("Main Color", Color) = (.5,.5,.5,1)
		_OutlineColor ("Outline Color", Color) = (0,0,0,1)
		_Outline ("Outline width", Range (.002, 0.03)) = .003
		_MainTex ("Base (RGB)", 2D) = "white" { }
		_Ramp ("Shading Ramp", 2D) = "gray" {}
	}
	
CGINCLUDE
#include "UnityCG.cginc"

struct appdata {
	float4 vertex : POSITION;
	float3 normal : NORMAL;
};

struct v2f {
	float4 pos : POSITION;
	float4 color : COLOR;
};

uniform float _Outline;
uniform float4 _OutlineColor;

v2f vert(appdata v) {
	// just make a copy of incoming vertex data but scaled according to normal direction
	v2f o;
	o.pos = mul(UNITY_MATRIX_MVP, v.vertex);

	float3 norm   = mul ((float3x3)UNITY_MATRIX_IT_MV, v.normal);
	float2 offset = TransformViewToProjection(norm.xy);

	//o.pos.xy += offset * o.pos.z * _Outline;
	o.pos.xy += offset * o.pos.z * 0.002;
	o.color = _OutlineColor;
	return o;
}
ENDCG

	SubShader {
		//Tags {"Queue" = "Geometry+100" }
CGPROGRAM
		#pragma surface surf Ramp

	  	sampler2D _Ramp;
	  	
		half4 LightingRamp (SurfaceOutput s, half3 lightDir, half atten) {
          half NdotL = dot (s.Normal, lightDir);
          half diff = NdotL * 0.5 + 0.5;
          half3 ramp = tex2D (_Ramp, float2(diff)).rgb;
          half4 c;
          c.rgb = s.Albedo	 * _LightColor0.rgb * ramp * (atten * 2);
          c.a = s.Alpha;
          return c;
      	}

sampler2D _MainTex;
fixed4 _Color;

struct Input {
	float2 uv_MainTex;
};

void surf (Input IN, inout SurfaceOutput o) {
	fixed4 c = tex2D(_MainTex, IN.uv_MainTex) * _Color;
	o.Albedo = c.rgb;
	o.Alpha = c.a;
}
ENDCG

		// note that a vertex shader is specified here but its using the one above
		Pass {
			Name "OUTLINE"
			Tags { "LightMode" = "Always" }
			Cull Front
			ZWrite On
			ColorMask RGB
			Blend SrcAlpha OneMinusSrcAlpha
			//Offset 50,50

			CGPROGRAM
			#pragma vertex vert
			#pragma fragment frag
			half4 frag(v2f i) :COLOR { return i.color; }
			ENDCG
		}
	}
	
	SubShader {
CGPROGRAM
#pragma surface surf Lambert

sampler2D _MainTex;
fixed4 _Color;

struct Input {
	float2 uv_MainTex;
};

void surf (Input IN, inout SurfaceOutput o) {
	fixed4 c = tex2D(_MainTex, IN.uv_MainTex) * _Color;
	o.Albedo = c.rgb;
	o.Alpha = c.a;
}
ENDCG

		Pass {
			Name "OUTLINE"
			Tags { "LightMode" = "Always" }
			Cull Front
			ZWrite On
			ColorMask RGB
			Blend SrcAlpha OneMinusSrcAlpha

			CGPROGRAM
			#pragma vertex vert
			#pragma fragment frag
			half4 frag(v2f i) :COLOR { return i.color; }
			#pragma exclude_renderers gles xbox360 ps3
			ENDCG
			SetTexture [_MainTex] { combine primary }
		}
	}
	
	Fallback "Diffuse"
}

I’d be tempted to convert those floats into halfs and make your sampler2D a sample2D_half, this should improve things a little. If you are not using the alpha then remove all references to it in the first pass. Not sure what is happening with the alpha in the outline - so you might need it there.