Why is my job running slowly the first few frames then speeding up?

I am trying to use the Unity job system to do a certain coding task quickly but what seems to be happening is that it takes a long time the first frame, then a medium amount of time for several more frames, then it goes lightning quick. The problem is that this coding task is something I will only call once at a time, not something that I will leave running for several frames. I am only leaving it running in the update loop for now just to see how fast it goes over several frames. It seems like it needs about a dozen frames to get warmed up before it will go lightning fast. How do I make the jobified code run quickly the first time it is called?

P.S. I have included a screenshot and my code in question. In my code, I have the jobified code along with a method called ControlTest() that does the task normally so I can compare the jobified code against it. The task I am trying to accomplish is to quickly convert all the local coordinates of a mesh to world coordinates. (But I have noticed this problem with the job system in other tasks as well). The screenshot shows the Profiler and the Console. The console is printing 2 things every frame: the time it takes the job to complete and the time it takes the ControlTest() to complete. As you can see, it isn’t till frame 9 that the job really speeds up. I need it to go that fast on frame 1.

using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using Unity.Mathematics;
using Unity.Jobs;
using UnityEngine.Jobs;
using Unity.Collections;
using Unity.Burst;

public class JobTest : MonoBehaviour{
    [SerializeField] bool runTest;
    [SerializeField] int count;
    Mesh meshB;
    Matrix4x4 matrix;
    NativeArray<Vector3> verticesB;
    NativeArray<int> trianglesB; 
    NativeArray<float3> worldPointsB;
    Vector3[] verticesBArray;
    int[] trianglesBArray;
    float3[] worldPointsBArray;
    float[] worldPointsBFloatArray;
    Vector3 vTemp;
    float m00, m10, m20, m01, m11, m21, m02, m12, m22, m03, m13, m23, x, y, z, fx, fy, fz;
    int trisBPointCount;

    private void OnValidate() {
        meshB = GetComponent<MeshFilter>().sharedMesh;
        verticesBArray = meshB.vertices;
        trianglesBArray = meshB.triangles;
        worldPointsBArray = new float3[meshB.triangles.Length];
        matrix = transform.localToWorldMatrix;
        verticesB = new NativeArray<Vector3>(meshB.vertices, Allocator.TempJob);
        trianglesB = new NativeArray<int>(meshB.triangles, Allocator.TempJob);
        trisBPointCount = trianglesB.Length;
        worldPointsB = new NativeArray<float3>(trisBPointCount, Allocator.TempJob);
    }

    private void Update() {
        if (runTest) {            
            float startTime = Time.realtimeSinceStartup;
            for (int i = 0; i < count; i++) {                
                DoReallyToughParallelJob();                
            }
            print("job: " + ((Time.realtimeSinceStartup - startTime) * 1000) + "ms");
             
            startTime = Time.realtimeSinceStartup;
            for (int i = 0; i < count; i++) {
                ControlTest();
            }
            print("control: " + ((Time.realtimeSinceStartup - startTime) * 1000) + "ms");
        }
    }   
    
    
    private void ControlTest() {
        int trisBLength = trianglesBArray.Length;
        m00 = matrix[0]; m10 = matrix[1]; m20 = matrix[2]; m01 = matrix[4]; m11 = matrix[5]; m21 = matrix[6];
        m02 = matrix[8]; m12 = matrix[9]; m22 = matrix[10]; m03 = matrix[12]; m13 = matrix[13]; m23 = matrix[14];
        for (int index = 0; index < trisBLength; index++) {
            vTemp = verticesBArray[trianglesBArray[index]];
            x = vTemp.x; y = vTemp.y; z = vTemp.z;
            fx = m00 * x + m01 * y + m02 * z + m03;
            fy = m10 * x + m11 * y + m12 * z + m13;
            fz = m20 * x + m21 * y + m22 * z + m23;
            worldPointsBArray[index] = new float3(fx, fy, fz);            
        }            
    }

  private void DoReallyToughParallelJob() {                        
        m00 = matrix[0]; m10 = matrix[1]; m20 = matrix[2]; m01 = matrix[4]; m11 = matrix[5]; m21 = matrix[6];
        m02 = matrix[8]; m12 = matrix[9]; m22 = matrix[10]; m03 = matrix[12]; m13 = matrix[13]; m23 = matrix[14];
        ReallyToughParallelJob reallyToughParallelJob = new ReallyToughParallelJob {
            m00 = m00, m10 = m10, m20 = m20, m01 = m01, m11 = m11, m21 = m21,
            m02 = m02, m12 = m12, m22 = m22, m03 = m03, m13 = m13, m23 = m23,
            verticesB = verticesB,
            trianglesB = trianglesB,
            worldPointsB = worldPointsB
        };
        JobHandle jobHandle = reallyToughParallelJob.Schedule(trisBPointCount, 100);
        jobHandle.Complete();
        //worldPointsB.Dispose();
        //verticesB.Dispose();
        //trianglesB.Dispose();
    }
}


[BurstCompile]
public struct ReallyToughParallelJob : IJobParallelFor {    
    [ReadOnly] public float m00, m10, m20, m01, m11, m21, m02, m12, m22, m03, m13, m23;
    [ReadOnly] public NativeArray<Vector3> verticesB;
    [ReadOnly] public NativeArray<int> trianglesB;
    public NativeArray<float3> worldPointsB;
    public void Execute (int index) {
        Vector3 vTemp;
        float x, y, z, fx, fy, fz;
        vTemp = verticesB[trianglesB[index]];
        x = vTemp.x;
        y = vTemp.y;
        z = vTemp.z;
        fx = m00 * x + m01 * y + m02 * z + m03;
        fy = m10 * x + m11 * y + m12 * z + m13;
        fz = m20 * x + m21 * y + m22 * z + m23;
        worldPointsB[index] = new float3(fx, fy, fz);        
    }
}

It’s because burst is compiling just-in-time in the editor, and the mono version is running in the meantime. If you enable Jobs->Burst->Synchronous Compilation, it will then stall to wait for Burst to finish compiling, and never run the mono version.

You can also enable this on a per-job basis with [BurstCompile(CompileSynchronously = true)].

See here for more info: Burst User Guide | Burst | 1.2.3