Unity JOBS having the same performance as main thread

I made a mesh generator for later use in terrain but when i try to compare between the main thread and the jobs system i always get the same performance, its like im running always into the main thread.

is there something i’ve missed?

using Unity.Burst;
using UnityEngine;
using Unity.Collections;
using Unity.Jobs;
using UnityEngine.Rendering;
using Unity.Mathematics;

public class MeshGenerator : MonoBehaviour
{
    public int size;
    public bool Jobs;
   
    [ContextMenu("Mesh")]
    void _Mesh()
    {
        GameObject gameObject = new GameObject();
        Mesh mesh = new Mesh();

        MeshFilter meshFilter = gameObject.AddComponent<MeshFilter>();
        MeshRenderer meshRenderer = gameObject.AddComponent<MeshRenderer>();

        SetMeshData(mesh, meshFilter, meshRenderer);
    }
    void SetMeshData(Mesh mesh, MeshFilter meshFilter, MeshRenderer meshRenderer)
    {
        float scale = 64.0f / (float)size;
       
        mesh.indexFormat = IndexFormat.UInt32;

        if (Jobs)
        {
            print("Jobs");
           
            NativeArray<Vector3> vertices = new NativeArray<Vector3>((size + 1) * (size + 1), Allocator.TempJob);
            NativeArray<int> triangles = new NativeArray<int>(size * size * 6, Allocator.TempJob);
       
            GetMeshJob getMeshJob = new GetMeshJob()
            {
                vertices = vertices,
                triangles = triangles,
                size = size,
                scale = scale
            };

            JobHandle jobHandle = getMeshJob.Schedule(size * size, 10000);
            jobHandle.Complete();

            mesh.vertices = vertices.ToArray();
            mesh.triangles = triangles.ToArray();

            vertices.Dispose();
            triangles.Dispose();
        }
        else
        {
            int tris = 0;
            int verts = 0;

            Vector3[] vertices = new Vector3[(size + 1) * (size + 1)];
            int[] triangles = new int[size * size * 6];
           
            for (int y = 0, index = 0; y <= size; y++)
            {
                for (int x = 0; x <= size; x++)
                {

                    vertices[index] = new Vector3(x * scale, 0, y * scale);
                    index++;
                }
            }
           
            for (int y = 0; y < size; y++)
            {
                for (int x = 0; x < size; x++)
                {
                    triangles[tris + 0] = verts;
                    triangles[tris + 1] = verts + size;
                    triangles[tris + 2] = verts + size + 1;
                    triangles[tris + 3] = verts + size + 1;
                    triangles[tris + 4] = verts + 1;
                    triangles[tris + 5] = verts;

                    tris += 6;
                    verts++;
                }
                verts++;
            }

            mesh.vertices = vertices;
            mesh.triangles = triangles;
        }
       
        meshFilter.mesh = mesh;
        mesh.RecalculateNormals();
    }
}

[BurstCompile]
public struct GetMeshJob : IJobParallelFor
{  
    [NativeDisableParallelForRestriction] public NativeArray<Vector3> vertices;
    [NativeDisableParallelForRestriction] public NativeArray<int> triangles;

    public float scale;
    public int size;
    public void Execute(int threadIndex)
    {
        int x = threadIndex % size;
        int y = threadIndex / size;

        int vert = threadIndex + y;
        int tri = threadIndex;
       
        vertices[vert] = new Vector3(x * scale, 0, y * scale);
       
        triangles[tri * 6 + 0] = vert;
        triangles[tri * 6 + 1] = vert + size;
        triangles[tri * 6 + 2] = vert + size + 1;
        triangles[tri * 6 + 3] = vert + size + 1;
        triangles[tri * 6 + 4] = vert + 1;
        triangles[tri * 6 + 5] = vert;
    }
}

Because you schedule and immediately called Complete() on the job. Which means you just block the main thread waiting for the job to finish.

If this is an editor tool, you probably want to schedule it and use something like an editor co-routine to wait for it to be finished.

1 Like

How does it look like on profiler timeline? On first sight I could say you need to use float3 instead of vector on jobs and no need to convert native array to managed one when setting it to mesh just do MyMesh.SetVertices(MyNativeArray), also job batch size could be too big

Ok so i made the changes but i still have the same performance weather im using jobs or the main thread.

        NativeArray<float3> vertices = new NativeArray<float3>((size + 1) * (size + 1), Allocator.TempJob);
        NativeArray<int> triangles = new NativeArray<int>(size * size * 6, Allocator.TempJob);
       
        GetMeshJob getMeshJob = new GetMeshJob()
        {
            vertices = vertices,
            triangles = triangles,
            size = size,
            scale = scale
        };

        JobHandle jobHandle = getMeshJob.Schedule(size * size, 2048);

        while (!jobHandle.IsCompleted)
            await Task.Delay(1);
       
        jobHandle.Complete();

        mesh.SetVertices(vertices);
        mesh.triangles = triangles.ToArray();
[BurstCompile]
public struct GetMeshJob : IJobParallelFor
{  
    [NativeDisableParallelForRestriction] public NativeArray<float3> vertices;
    [NativeDisableParallelForRestriction] public NativeArray<int> triangles;

    public float scale;
    public int size;
    public void Execute(int threadIndex)
    {
        int x = threadIndex % size;
        int y = threadIndex / size;

        int vert = threadIndex + y;
        int tri = threadIndex;
       
        vertices[vert] = new float3(x * scale, 0, y * scale);
       
        triangles[tri * 6 + 0] = vert;
        triangles[tri * 6 + 1] = vert + size;
        triangles[tri * 6 + 2] = vert + size + 1;
        triangles[tri * 6 + 3] = vert + size + 1;
        triangles[tri * 6 + 4] = vert + 1;
        triangles[tri * 6 + 5] = vert;
    }
}

I cant see anything related to job in that image, I think you posted random frame. May I ask how do you compare performance in first place? You could try run that method in every frame that would be easy to capture. Also there is Mesh.SetIndices() for setting triangles

Sorry i just not really familiar with the profiler but still cant seem to find any job related thing in it.

what i did to check the performance was this:

float frame = Time.realtimeSinceStartup;
           
SetMeshData(mesh, meshFilter, meshRenderer);
print(Time.realtimeSinceStartup - frame);


.

I think you dont need wait for a extra frame for your use case just forget about it. As we can see job itself successfully multithreaded and takes 41 ms while something before that takes 160 ms and you need to find it. It would be good that if you unfold the main thread view for me to see it :smile: You can still check 2 thing after that, firstly to make sure job is burst compiled make sure can be see on burst compiler and also make sure burst compilaton is turned on

alright i checked if burst compile is on and it appears it is. Here is the full script:

using System.Threading.Tasks;
using Unity.Burst;
using UnityEngine;
using Unity.Collections;
using Unity.Jobs;
using UnityEngine.Rendering;
using Unity.Mathematics;

public class MeshGenerator : MonoBehaviour
{
    public int size;
    public bool Jobs;
    void Update()
    {
        GameObject gameObject = new GameObject();
        Mesh mesh = new Mesh();

        MeshFilter meshFilter = gameObject.AddComponent<MeshFilter>();
        MeshRenderer meshRenderer = gameObject.AddComponent<MeshRenderer>();

        float frame = Time.realtimeSinceStartup;
           
        SetMeshData(mesh, meshFilter, meshRenderer);
       
        print(Time.realtimeSinceStartup - frame);
    }
    void SetMeshData(Mesh mesh, MeshFilter meshFilter, MeshRenderer meshRenderer)
    {
        float scale = 64.0f / (float)size;
       
        mesh.indexFormat = IndexFormat.UInt32;

        NativeArray<float3> vertices = new NativeArray<float3>((size + 1) * (size + 1), Allocator.TempJob);
        NativeArray<int> triangles = new NativeArray<int>(size * size * 6, Allocator.TempJob);
       
        GetMeshJob getMeshJob = new GetMeshJob()
        {
            vertices = vertices,
            triangles = triangles,
            size = size,
            scale = scale
        };

        JobHandle jobHandle = getMeshJob.Schedule(size * size, 1024);
        jobHandle.Complete();

        mesh.SetVertices(vertices);
        mesh.SetIndices(triangles, MeshTopology.Triangles, 0, false, 0);

        vertices.Dispose();
        triangles.Dispose();
       
        meshFilter.mesh = mesh;
       
        mesh.RecalculateNormals();
        mesh.RecalculateBounds();
        mesh.RecalculateTangents();
    }
}

[BurstCompile]
public struct GetMeshJob : IJobParallelFor
{  
    [NativeDisableParallelForRestriction] public NativeArray<float3> vertices;
    [NativeDisableParallelForRestriction] public NativeArray<int> triangles;

    public float scale;
    public int size;
    public void Execute(int threadIndex)
    {
        int x = threadIndex % size;
        int y = threadIndex / size;

        int vert = threadIndex + y;
        int tri = threadIndex;
       
        vertices[vert] = new float3(x * scale, 0, y * scale);
       
        triangles[tri * 6 + 0] = vert;
        triangles[tri * 6 + 1] = vert + size;
        triangles[tri * 6 + 2] = vert + size + 1;
        triangles[tri * 6 + 3] = vert + size + 1;
        triangles[tri * 6 + 4] = vert + 1;
        triangles[tri * 6 + 5] = vert;
    }
}
1 Like

Okay, the issue calculation itself takes much less time to other methods, so making it super fast wont change overall speed much. I think you can see it on profiler with added markers easily

using System.Threading.Tasks;
using Unity.Burst;
using UnityEngine;
using Unity.Collections;
using Unity.Jobs;
using UnityEngine.Rendering;
using Unity.Mathematics;
using UnityEngine.Profiling;

public class MeshGenerator : MonoBehaviour
{
    public int size;
    public bool Jobs;
    void Update()
    {
        GameObject gameObject = new GameObject();
        Mesh mesh = new Mesh();
        MeshFilter meshFilter = gameObject.AddComponent<MeshFilter>();
        MeshRenderer meshRenderer = gameObject.AddComponent<MeshRenderer>();
        float frame = Time.realtimeSinceStartup;
         
        SetMeshData(mesh, meshFilter, meshRenderer);
     
        print(Time.realtimeSinceStartup - frame);
    }
    void SetMeshData(Mesh mesh, MeshFilter meshFilter, MeshRenderer meshRenderer)
    {
        float scale = 64.0f / (float)size;
     
        mesh.indexFormat = IndexFormat.UInt32;
        Profiler.BeginSample("Alloc");
        NativeArray<float3> vertices = new NativeArray<float3>((size + 1) * (size + 1), Allocator.TempJob,NativeArrayOptions.UninitializedMemory);
        NativeArray<int> triangles = new NativeArray<int>(size * size * 6, Allocator.TempJob,NativeArrayOptions.UninitializedMemory);
        Profiler.EndSample();
     
        GetMeshJob getMeshJob = new GetMeshJob()
        {
            vertices = vertices,
            triangles = triangles,
            size = size,
            scale = scale
        };
        JobHandle jobHandle = getMeshJob.Schedule(size * size, 1024);
        jobHandle.Complete();
      
        Profiler.BeginSample("SetMeshData");
        mesh.SetVertices(vertices);
        mesh.SetIndices(triangles, MeshTopology.Triangles, 0, false, 0);
      
        Profiler.EndSample();
      
        Profiler.BeginSample("SetMesh");
        meshFilter.mesh = mesh;
        Profiler.EndSample();
        Profiler.BeginSample("DisposeNativeArrays");
        vertices.Dispose();
        triangles.Dispose();
        Profiler.EndSample();
     
     
        Profiler.BeginSample("RecalculateNormals");
        mesh.RecalculateNormals();
        Profiler.EndSample();
      
        Profiler.BeginSample("RecalculateBounds");
        mesh.RecalculateBounds();
        Profiler.EndSample();
      
        Profiler.BeginSample("RecalculateTangents");
        mesh.RecalculateTangents();
        Profiler.EndSample();
    }
}
[BurstCompile]
public struct GetMeshJob : IJobParallelFor
{
    [NativeDisableParallelForRestriction] public NativeArray<float3> vertices;
    [NativeDisableParallelForRestriction] public NativeArray<int> triangles;
    public float scale;
    public int size;
    public void Execute(int threadIndex)
    {
        int x = threadIndex % size;
        int y = threadIndex / size;
        int vert = threadIndex + y;
        int tri = threadIndex;
     
        vertices[vert] = new float3(x * scale, 0, y * scale);
     
        triangles[tri * 6 + 0] = vert;
        triangles[tri * 6 + 1] = vert + size;
        triangles[tri * 6 + 2] = vert + size + 1;
        triangles[tri * 6 + 3] = vert + size + 1;
        triangles[tri * 6 + 4] = vert + 1;
        triangles[tri * 6 + 5] = vert;
    }
}

Also I made it little bit faster by unutilized arrays. In my computer whole process takes 70 ms, calculation itself takes 2.7 ms (the part we working on), setting mesh data takes 17ms, normal recalc takes 24ms and tangent takes 21 ms. So you successfully fix the wrong bottleneck, this is why every body learn profiling before optimization :smile:

Hmmm is that even valid in vanilla Unity? Kinda expecting this to still block the main thread.

Aside from that, you’re using the wrong API! This is classic single-tjhreaded Unity Mesh API so there’s very little you can jobify here.

You want to be using the Mesh.MeshData and MeshDataArray classes instead. Manual provides an example too.