Performance tips

Some script changes which might help you write more performance code.

Speed comparison:
8441204--1118813--upload_2022-9-15_10-31-54.png

Code:

using System.Collections.Generic;
using UnityEngine;
using UnityEngine.Profiling;
using System.Linq;

public class Benchmark : MonoBehaviour
{
    int count = 1_000_000;
    string var1 = "abcE";
    List<int> var3;
    int[] var4;

    // Start is called before the first frame update
    void Start()
    {
        var3 = Enumerable.Range(0, 1000).ToList();
        var4 = Enumerable.Range(0, 5).ToArray();

        Profiler.BeginSample("1 SetTransformDouble");
        for (int i = 0; i < count; i++)
        {
            transform.position = new Vector3(Random.Range(-100, 100), Random.Range(-100, 100), Random.Range(-100, 100));
            transform.rotation = Random.rotation;
        }
        Profiler.EndSample();

        Profiler.BeginSample("1 SetTransformCombined");
        for (int i = 0; i < count; i++)
        {
            transform.SetPositionAndRotation(new Vector3(Random.Range(-100, 100), Random.Range(-100, 100), Random.Range(-100, 100)), Random.rotation);
        }
        Profiler.EndSample();

        Profiler.BeginSample("1 DoubleTransformLocal");
        for (int i = 0; i < count; i++)
        {
            transform.localPosition = new Vector3(Random.Range(-100, 100), Random.Range(-100, 100), Random.Range(-100, 100));
            transform.localRotation = Random.rotation;
        }
        Profiler.EndSample();

        AudioSource mf;
        bool temp = false;
        Profiler.BeginSample("2 tryGetComponentFalse");
        for (int i = 0; i < count; i++)
        {
            if (TryGetComponent<AudioSource>(out mf))
            {
                temp = true;
            }
        }
        Profiler.EndSample();

        Profiler.BeginSample("2 GetComponentFalse");
        for (int i = 0; i < count; i++)
        {
            mf = GetComponent<AudioSource>();
            if (mf != null)
            {
                temp = true;
            }
        }
        Profiler.EndSample();

        gameObject.AddComponent<AudioSource>();

        Profiler.BeginSample("2 tryGetTrue");
        for (int i = 0; i < count; i++)
        {
            if (TryGetComponent<AudioSource>(out mf))
            {
                temp = true;
            }
        }
        Profiler.EndSample();

        Profiler.BeginSample("2 GetComponentTrue");
        for (int i = 0; i < count; i++)
        {
            mf = GetComponent<AudioSource>();
            if (mf != null)
            {
                temp = true;
            }
        }
        Profiler.EndSample();

        Profiler.BeginSample("3 string: ==");
        for (int i = 0; i < count; i++)
        {
            if (var1 == string.Empty)
            {

            }
        }
        Profiler.EndSample();

        Profiler.BeginSample("3 string: == blank");
        for (int i = 0; i < count; i++)
        {
            if (var1 == "")
            {

            }
        }
        Profiler.EndSample();

        Profiler.BeginSample("3 string .isnullorempty");
        for (int i = 0; i < count; i++)
        {
            if (string.IsNullOrEmpty(var1))
            {

            }

        }
        Profiler.EndSample();

        Profiler.BeginSample("3 string .length null check");
        for (int i = 0; i < count; i++)
        {
            if (var1 != null && var1.Length == 0)
            {

            }

        }
        Profiler.EndSample();

        Profiler.BeginSample("3 string ?.lenght");
        for (int i = 0; i < count; i++)
        {
            if (var1?.Length == 0)
            {

            }

        }
        Profiler.EndSample();

        Profiler.BeginSample("3 string .lenght");
        for (int i = 0; i < count; i++)
        {
            if(var1.Length == 0)
            {

            }

        }
        Profiler.EndSample();

        Profiler.BeginSample("4 native count");
        for (int i = 0; i < count; i++)
        {
            int count = var3.Count;

        }
        Profiler.EndSample();

        Profiler.BeginSample("4 LINQ count");
        for (int i = 0; i < count; i++)
        {
            int count = var3.Count();


        }
        Profiler.EndSample();

        Profiler.BeginSample("5 AddRange");
        for (int i = 0; i < count; i++)
        {
            var3.AddRange(var4);
        }
        Profiler.EndSample();

        Profiler.BeginSample("5 Add looped");
        for (int i = 0; i < count; i++)
        {
            for (int j = 0; j < var4.Length; j++)
            {
                var3.Add(var4[j]);
            }

        }
        Profiler.EndSample();

        System.GC.Collect();
    }
}

Feel free to add to discuss in replies

2 Likes

I consider your results for add looped vs AddRange highly flawed. You are reusing var3 array not only between iterations but also between measuring of different approaches. Cost of list.Add or list.AddRange highly depends how many elements and how you added before. For comparison like this it is also important how many elements you are adding. Since in theory AddRange has advantage that it knows how many elements you are going to add while Add has to guess whether you are immediately going to repeat Add few hundred times or maybe none at all for long time. There is also a potential for difference due builtin function potentially copying whole range of memory directly instead of naive looping within code.

The way dynamic array structures like System.Generic.List are typically implemented is by having an amount of reserved memory which might be higher than the size reported by Count. Sometimes you can even query it with Capacity property. That way when you do Add, if list has unused capacity it only needs to increase Count and copy only the new element without any memory allocations. If list implementation didn’t do this each Add would require doing a memory allocation and copying all the existing elements each time you add one more of them. Only when you try to Add new element but there is no unused capacity, the list will have to be reallocated and existing elements moved/copied. To reduce the amount of memory allocations and times existing list items need to be moved, many list implementations increase the capacity based on current size. The bigger the current size of list, the more extra capacity it will reserve it needs more. It is easiest to understand with multiplier of 2, but math for geometric progression works similarly with any multiplier > 1. The exact constant will vary between different implementations as there is tradeoff between less reallocations and more memory overhead consumed by unused capacity.

Example assuming multiplier 2 (mono used by Unity might have different constant):

var x = new List<int>(); // count=0, capacity = 0
x.Add(1); //          allocate, count = 1, capacity = 1
x.Add(1); //          allocate, count = 2, capacity = 2
x.Add(1); //          allocate, count = 3, capacity = 4
x.Add(1); // no allocation, count = 4, capacity = 4
x.Add(1); //          allocate, count = 5, capacity = 8
x.Add(1); //  no allocation, count = 6, capacity = 8
x.Add(1); //   no allocation, count = 7, capacity = 8
x.Add(1); //    no allocation, count = 8, capacity = 8
x.Add(1); //          allocate, count = 9, capacity = 16
x.Add(1); //  no allocation, count = 10, capacity = 16
x.Add(1); //  no allocation, count = 11, capacity = 16
x.Add(1); //  no allocation, count = 12, capacity = 16
x.Add(1); //  no allocation, count = 13, capacity = 16

Due to the way geometric progression sum works total amount of item copy operations to build list of size N will be 2N and on average each element copied 2 times. If the capacity multiplier was smaller than 2, the average amount of copies would be slightly higher. On the other hand if list increased size only by one or some fixed count, total amount of copy operations would be on the order of N^2.

The whole capacity strategy also means that inserting elements after x = new List<int>(n) and x.Clear() can have huge performance difference. The docs for List.Clear says that it reduces count to 0, but the Capacity remains unchanged. This means that inserting new elements into list that was previously large and then emptied with clear can be much faster than if the list was cleared by creating new empty list. If you are trying to compare performance of how different insertion strategies, you might not be able to observe differences caused by different reallocation behavior if list was empties with Clear() and still has capacity from previous runs.

So here are my modified version for benchmarking Add vs AddRange

using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using UnityEngine.Profiling;
using System.Linq;
public class Benchmark : MonoBehaviour
{
int count = 1_000_00;
string var1 = "abcE";
List<int> var3;
int[] var4;
// Start is called before the first frame update
public void Go()
{
var3 = Enumerable.Range(0, 1000).ToList();
var4 = Enumerable.Range(0, 5).ToArray();
UnityEngine.Profiling.Profiler.BeginSample("5 AddRange");
var3 = new List<int>();
for (int i = 0; i < count; i++)
{
var3.AddRange(var4);
}
UnityEngine.Profiling.Profiler.EndSample();
UnityEngine.Profiling.Profiler.BeginSample("5 Add looped");
var3 = new List<int>();
for (int i = 0; i < count; i++)
{
for (int j = 0; j < var4.Length; j++)
{
var3.Add(var4[j]);
}
}
UnityEngine.Profiling.Profiler.EndSample();


UnityEngine.Profiling.Profiler.BeginSample("5 AddRange clear");
var3 = new List<int>();
for (int i = 0; i < count; i++)
{
var3.Clear();
var3.AddRange(var4);
}
UnityEngine.Profiling.Profiler.EndSample();
UnityEngine.Profiling.Profiler.BeginSample("5 Add looped clear");
var3 = new List<int>();
for (int i = 0; i < count; i++)
{
var3.Clear();
for (int j = 0; j < var4.Length; j++)
{
var3.Add(var4[j]);
}
}
UnityEngine.Profiling.Profiler.EndSample();


UnityEngine.Profiling.Profiler.BeginSample("5 AddRange new empty");
var3 = new List<int>();
for (int i = 0; i < count; i++)
{
var3 = new List<int>();
var3.AddRange(var4);
}
UnityEngine.Profiling.Profiler.EndSample();
UnityEngine.Profiling.Profiler.BeginSample("5 Add looped new empty");
var3 = new List<int>();
for (int i = 0; i < count; i++)
{
var3 = new List<int>();
for (int j = 0; j < var4.Length; j++)
{
var3.Add(var4[j]);
}
}
UnityEngine.Profiling.Profiler.EndSample();


var var400 = Enumerable.Range(0, 103).ToArray();
UnityEngine.Profiling.Profiler.BeginSample("103 AddRange");
var3 = new List<int>();
for (int i = 0; i < count; i++)
{
var3.AddRange(var400);
}
UnityEngine.Profiling.Profiler.EndSample();
UnityEngine.Profiling.Profiler.BeginSample("103 Add looped");
var3 = new List<int>();
for (int i = 0; i < count; i++)
{
for (int j = 0; j < var400.Length; j++)
{
var3.Add(var400[j]);
}
}
UnityEngine.Profiling.Profiler.EndSample();


UnityEngine.Profiling.Profiler.BeginSample("103 AddRange clear");
var3 = new List<int>();
for (int i = 0; i < count; i++)
{
var3.Clear();
var3.AddRange(var400);
}
UnityEngine.Profiling.Profiler.EndSample();
UnityEngine.Profiling.Profiler.BeginSample("103 Add looped clear");
var3 = new List<int>();
for (int i = 0; i < count; i++)
{
var3.Clear();
for (int j = 0; j < var400.Length; j++)
{
var3.Add(var400[j]);
}
}
UnityEngine.Profiling.Profiler.EndSample();

UnityEngine.Profiling.Profiler.BeginSample("103 AddRange new");
var3 = new List<int>();
for (int i = 0; i < count; i++)
{
var3 = new List<int>();
var3.AddRange(var400);
}
UnityEngine.Profiling.Profiler.EndSample();
UnityEngine.Profiling.Profiler.BeginSample("103 Add looped new");
var3 = new List<int>();
for (int i = 0; i < count; i++)
{
var3 = new List<int>();
for (int j = 0; j < var400.Length; j++)
{
var3.Add(var400[j]);
}
}
UnityEngine.Profiling.Profiler.EndSample();

System.GC.Collect();
}
}


The main takeaway from that shouldn’t be whether Add or AddRange is better. But that things can be a lot more complex than simply X is better than Y. Don’t trust random post on the internet from N years ago that doing X is faster than Y even if they are backed by some benchmark results. Things might be different with different usage patterns, or maybe you are using newer Unity with newer more optimized C# runtimer, or maybe you are targeting mobile platform which uses il2cpp instead of mono.
Make your own benchmarks and do it with the data and usage patterns that represents your usecase. What’s “better” might depend on factors that you can’t imagine so when creating artificial microbenchmark which doesn’t properly take it into account it may give impressions that the change will make things 3 times faster but in reality it makes them 9 times slower.

One more interesting aspect to watchout while doing benchmarks is the cost of profiling process. Not sure if it’s enabled by default or whether I had it enabled from previous profiling, but Unity can record callstack when a garbage collectable allocation is performed. Recording callstack is a costly operations especially in microbenchmarks like these. I observed up to 20%-50% of time being consumed by profiler recordoing callstack instead the stuff you are trying to profile. At that point you are not measuring how long the code runs, but how many times it causes profiler to record callstack. It had a large impact on the order of results for different test cases.

2 Likes

Amazing reply! Will definitely take this with me for future work!