Why accessing via ref is faster than pointer dereference?

Hi. I tested some performance comparison when accessing to array element.

It tested 7 types of method:

  1. Using UnsafeUtility.ReadArrayElement()
  2. Using UnsafeUtility.ReadArrayElement() + inline
  3. Using UnsafeUtility.ArrayElementAsRef()
  4. Using UnsafeUtility.ArrayElementAsRef() + inline
  5. Using unsafe pointer dereference
  6. Using unsafe pointer dereference + inline
  7. Using unsafe pointer + C#'s ref keyword.

Full Test code:

using System;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics.Contracts;
using System.Runtime.InteropServices;
using Unity.Collections.LowLevel.Unsafe;
using Unity.VisualScripting;
using UnityEngine;

public class PerformanceTest : MonoBehaviour
{
    private unsafe void Start()
    {
        var size = 5000;
        var loopCount = 5000;
        var buffer = UnsafeUtility.Malloc(UnsafeUtility.SizeOf<BigStruct>() * size, UnsafeUtility.AlignOf<BigStruct>(), Unity.Collections.Allocator.Temp);

        for (int i = 0; i < size; i++)
        {
            UnsafeUtility.WriteArrayElement<BigStruct>(buffer, i, new BigStruct { A = i });
        }

        var stopwatch = new System.Diagnostics.Stopwatch();
        var sum = 0;

        // Warmup
        TestByReadElement(buffer, size, loopCount);
        TestByReadElementInline(buffer, size, loopCount);
        TestByElementAsRef(buffer, size, loopCount);
        TestByElementAsRefInline(buffer, size, loopCount);
        TestByPtr(buffer, size, loopCount);
        TestByPtrInline(buffer, size, loopCount);
        TestByPtrRef(buffer, size, loopCount);

        // Test
        stopwatch.Restart();
        sum = TestByReadElement(buffer, size, loopCount);
        Debug.Log($"{nameof(TestByReadElement),-32} : sum={sum}, time={stopwatch.Elapsed.ToString("c")}");

        stopwatch.Restart();
        sum = TestByReadElementInline(buffer, size, loopCount);
        Debug.Log($"{nameof(TestByReadElementInline),-32} : sum={sum}, time={stopwatch.Elapsed.ToString("c")}");

        stopwatch.Restart();
        sum = TestByElementAsRef(buffer, size, loopCount);
        Debug.Log($"{nameof(TestByElementAsRef),-32} : sum={sum}, time={stopwatch.Elapsed.ToString("c")}");

        stopwatch.Restart();
        sum = TestByElementAsRefInline(buffer, size, loopCount);
        Debug.Log($"{nameof(TestByElementAsRefInline),-32} : sum={sum}, time={stopwatch.Elapsed.ToString("c")}");

        stopwatch.Restart();
        sum = TestByPtr(buffer, size, loopCount);
        Debug.Log($"{nameof(TestByPtr),-32} : sum={sum}, time={stopwatch.Elapsed.ToString("c")}");

        stopwatch.Restart();
        sum = TestByPtrInline(buffer, size, loopCount);
        Debug.Log($"{nameof(TestByPtrInline),-32} : sum={sum}, time={stopwatch.Elapsed.ToString("c")}");

        stopwatch.Restart();
        sum = TestByPtrRef(buffer, size, loopCount);
        Debug.Log($"{nameof(TestByPtrRef),-32} : sum={sum}, time={stopwatch.Elapsed.ToString("c")}");
    }

    private unsafe int TestByReadElement(void* buffer, int size, int loopCount)
    {
        var sum = 0;

        for (int loopIndex = 0; loopIndex < loopCount; loopIndex++)
        {
            for (int i = 0; i < size; i++)
            {
                var value = UnsafeUtility.ReadArrayElement<BigStruct>(buffer, i);
                sum += value.A;
            }
        }

        return sum;
    }

    private unsafe int TestByReadElementInline(void* buffer, int size, int loopCount)
    {
        var sum = 0;

        for (int loopIndex = 0; loopIndex < loopCount; loopIndex++)
        {
            for (int i = 0; i < size; i++)
            {
                sum += UnsafeUtility.ReadArrayElement<BigStruct>(buffer, i).A;
            }
        }

        return sum;
    }

    private unsafe int TestByElementAsRef(void* buffer, int size, int loopCount)
    {
        var sum = 0;

        for (int loopIndex = 0; loopIndex < loopCount; loopIndex++)
        {
            for (int i = 0; i < size; i++)
            {
                ref var value = ref UnsafeUtility.ArrayElementAsRef<BigStruct>(buffer, i);
                sum += value.A;
            }
        }

        return sum;
    }

    private unsafe int TestByElementAsRefInline(void* buffer, int size, int loopCount)
    {
        var sum = 0;

        for (int loopIndex = 0; loopIndex < loopCount; loopIndex++)
        {
            for (int i = 0; i < size; i++)
            {
                sum += UnsafeUtility.ArrayElementAsRef<BigStruct>(buffer, i).A;
            }
        }

        return sum;
    }

    private unsafe int TestByPtr(void* buffer, int size, int loopCount)
    {
        var sum = 0;

        for (int loopIndex = 0; loopIndex < loopCount; loopIndex++)
        {
            for (int i = 0; i < size; i++)
            {
                var value = (((BigStruct*)buffer) + i);
                sum += value->A;
            }
        }

        return sum;
    }

    private unsafe int TestByPtrInline(void* buffer, int size, int loopCount)
    {
        var sum = 0;

        for (int loopIndex = 0; loopIndex < loopCount; loopIndex++)
        {
            for (int i = 0; i < size; i++)
            {
                sum += (((BigStruct*)buffer) + i)->A;
            }
        }

        return sum;
    }

    private unsafe int TestByPtrRef(void* buffer, int size, int loopCount)
    {
        var sum = 0;

        for (int loopIndex = 0; loopIndex < loopCount; loopIndex++)
        {
            for (int i = 0; i < size; i++)
            {
                ref var value = ref (*(((BigStruct*)buffer) + i));
                sum += value.A;
            }
        }

        return sum;
    }

    [StructLayout(LayoutKind.Sequential)]
    struct BigStruct
    {
        public int A;

        public long B0;
        public double B1;
        public double B2;
        public double B3;
        public double B4;
        public double B5;
        public double B6;
        public double B7;
        public double B8;
        public double B9;
    }
}

And result in IL2CPP Windows64 release build:

Result is surprising. Accessing via pointer+ref (method 7, TestByPtrRef) is extremely faster than others, including pure pointer dereference too. But I can’t figure out what make this difference. Can you explain about this?

Thanks.

Note: When I changed execution order for tests, result is same. Even without warm-up, TestByPtrRef is always fastest.

Use Burst if you care about performance. Right now you are relying on the sequence of two less-aggressive compilers and any random thing could cause one of them to get tripped up and do something suboptimal.

1 Like

I re-tested with Burst and found that 1-7 shows almost same performance (but method 1,2 is slightly fast). Of course, bursted version was faster. I’ll use Burst for performance-critical code path. Thanks.

(still non-bursted TestByPtrRef is extremely faster and it maybe skip some processing by compiler?)

Interesting, but the sum value seems incorrect? Try getting the sum value correct, and maybe there will be a difference in results.

Tbh they are all about the same speed apart from the top 2. Looks tidier by ref like this. I personally love pointers because you can control a lot in a shorter space. And if you know how not to break memory with them, then you can avoid the slow down of safety checks. C# is a safe language, people don’t like you using pointers directly. But at the end of the day, speed is all about memory. If you don’t understand memory, don’t use pointers. I’m still trying to figure out cache hits though.

                ref BigStruct value = ref ((BigStruct*)buffer)[i];
                sum += value.A;

Also, there is pointer arithmetic which works like this, but doesn’t prove much in it being faster (looping also takes time).

            for (int i = 0; i < size; i += 8)
            {
                BigStruct* ptr = &((BigStruct*)buffer)[i];
                sum += ptr++->A + ptr++->A + ptr++->A + ptr++->A + ptr++->A + ptr++->A + ptr++->A + ptr->A;