Mathematics long4 type

Just wondering why there isn’t longn or ulongn types in Unity.Mathematics? Should I be using double types and converting?

I created an issue on the github repo regarding long and ulong vector types a few months ago.
https://github.com/Unity-Technologies/Unity.Mathematics/issues/69

Unfortunately I never got an answer :frowning:
I am really wondering why even double is supported but not 64bit integer numbers. The benefits are obvious and I don’t see any technical reasons that whould explain why it is not already supported yet.

That’s a shame. I’m getting a lot out of the auto-vectorisation of int4, but need a long4 equivalent. I don’t want to lose all those perf gains!

64bit integer numbers are supported like double (long/ulong). But not a vector version of both double and 64bit integer (double4/long4/ulong4).

Maybe the technical reason is because uint4, int4, float4 all sums to 32 bits x 4 = 128 and it is possible to fit in xmm registers in AVX or similar sized register that they could ensure wider support. long4 / double4 would require special case and more work for some architecture with 256 bit register, then a special routine to divide and conquer with smaller registers on device without 256 bit commands?

There is a double4. Am I misunderstanding something?

Good point. I know very little about this, but it does seem like AVX2 has been around since 2013/2015 for Intel/AMD. But I suppose there would still need to be support for non-AVX2 processors.

I was referencing from Burst documentation, seems like it maybe burstable in the future and you can just use it now.

Ah I see, thanks 5argon.

Still a bit of a mystery why double4 is supported but long4 isn’t, even as a non-vector type.

It may be supported in the future. What kinds of operations are you looking to perform with longN types?

I’d like SIMD support for decoding multiple 64 bit Morton numbers. https://github.com/johnsietsma/InfPoints/blob/master/com.infpoints.octree/Runtime/Morton.cs

Thanks for the example.

I saw your comment in that code sample on DecodeMorton64 which says: /// Burst will not auto-vectorise 64 bit types.

Do you have the assembly for that we can look at?

Ah, I wasn’t very clear on that comment. I meant there is no long4 type, so no packed assembly instructions. The burst docs don’t list doublen a vector type, so there is no 64 bit vectorisation.

I’ve added jobs and tests for the 64bit morton encoding to the project.

I’ve previously measured the 32bit perf and looked at it’s assembly, Morton Order - Burst | John Sietsma. Having a uint4 version of the encoding and decoding functions doubles performance, I was hoping to do the same with long4.

Assembly for the 64bit decode job is below:

        .text
        .intel_syntax noprefix
        .file   "main"
        .globl  "Unity.Jobs.IJobExtensions.JobStruct`1<InfPoints.Octree.Jobs.Morton64EncodeJob>.Execute(ref InfPoints.Octree.Jobs.Morton64EncodeJob data, System.IntPtr additionalPtr, System.IntPtr bufferRangePatchData, ref Unity.Jobs.LowLevel.Unsafe.JobRanges ranges, int jobIndex)_A91CEEBEFAC9E263"
        .p2align        4, 0x90
        .type   "Unity.Jobs.IJobExtensions.JobStruct`1<InfPoints.Octree.Jobs.Morton64EncodeJob>.Execute(ref InfPoints.Octree.Jobs.Morton64EncodeJob data, System.IntPtr additionalPtr, System.IntPtr bufferRangePatchData, ref Unity.Jobs.LowLevel.Unsafe.JobRanges ranges, int jobIndex)_A91CEEBEFAC9E263",@function
"Unity.Jobs.IJobExtensions.JobStruct`1<InfPoints.Octree.Jobs.Morton64EncodeJob>.Execute(ref InfPoints.Octree.Jobs.Morton64EncodeJob data, System.IntPtr additionalPtr, System.IntPtr bufferRangePatchData, ref Unity.Jobs.LowLevel.Unsafe.JobRanges ranges, int jobIndex)_A91CEEBEFAC9E263":
        .cfi_startproc
        push    r15
        .cfi_def_cfa_offset 16
        push    r14
        .cfi_def_cfa_offset 24
        push    r13
        .cfi_def_cfa_offset 32
        push    r12
        .cfi_def_cfa_offset 40
        push    rsi
        .cfi_def_cfa_offset 48
        push    rdi
        .cfi_def_cfa_offset 56
        push    rbp
        .cfi_def_cfa_offset 64
        push    rbx
        .cfi_def_cfa_offset 72
        sub     rsp, 72
        movdqa  xmmword ptr [rsp + 48], xmm6
        .cfi_def_cfa_offset 144
        .cfi_offset rbx, -72
        .cfi_offset rbp, -64
        .cfi_offset rdi, -56
        .cfi_offset rsi, -48
        .cfi_offset r12, -40
        .cfi_offset r13, -32
        .cfi_offset r14, -24
        .cfi_offset r15, -16
        .cfi_offset xmm6, -96
        mov     rsi, rcx
        mov     eax, dword ptr [rsi + 8]
        test    eax, eax
        jle     .LBB0_24
        xor     r13d, r13d
        movabs  rbx, offset .Lburst_abort_Ptr
        movabs  r12, 1317624576693539401
        xor     r14d, r14d
        movabs  rbp, 1207822528635744451
        .p2align        4, 0x90
.LBB0_2:
        movsxd  rcx, dword ptr [rsi + 12]
        cmp     r14, rcx
        jl      .LBB0_4
        movsxd  rdx, dword ptr [rsi + 16]
        cmp     r14, rdx
        jle     .LBB0_9
.LBB0_4:
        test    ecx, ecx
        jne     .LBB0_7
        dec     eax
        cmp     dword ptr [rsi + 16], eax
        jne     .LBB0_7
        movabs  rcx, offset .Lburst_abort.error.id.1
        movabs  rdx, offset .Lburst_abort.error.message.2
        jmp     .LBB0_8
        .p2align        4, 0x90
.LBB0_7:
        movabs  rcx, offset .Lburst_abort.error.id.3
        movabs  rdx, offset .Lburst_abort.error.message.4
.LBB0_8:
        call    qword ptr [rbx]
.LBB0_9:
        mov     rax, qword ptr [rsi + 24]
        mov     ecx, dword ptr [rsi + 32]
        mov     edx, dword ptr [rax]
        and     edx, -7
        cmp     ecx, edx
        je      .LBB0_11
        mov     edx, dword ptr [rsi + 36]
        mov     qword ptr [rsp + 32], rax
        mov     dword ptr [rsp + 40], ecx
        mov     dword ptr [rsp + 44], edx
        lea     rcx, [rsp + 32]
        movabs  rax, offset ".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckReadAndThrowNoEarlyOut_Injected_Ptr"
        call    qword ptr [rax]
.LBB0_11:
        mov     rax, qword ptr [rsi]
        movq    xmm6, qword ptr [rax + r13]
        pinsrd  xmm6, dword ptr [rax + r13 + 8], 2
        pshufd  xmm0, xmm6, 36
        pshufd  xmm1, xmm0, 78
        pmaxud  xmm1, xmm0
        pshufd  xmm0, xmm1, 229
        pmaxud  xmm0, xmm1
        movd    eax, xmm0
        cmp     eax, 2097152
        jb      .LBB0_13
        movabs  rcx, offset .Lburst_abort.error.id
        movabs  rdx, offset .Lburst_abort.error.message
        call    qword ptr [rbx]
.LBB0_13:
        pextrd  eax, xmm6, 2
        and     eax, 4194303
        mov     rcx, rax
        shl     rcx, 32
        or      rcx, rax
        shl     rax, 16
        or      rax, rcx
        movabs  r8, 8725728556220671
        and     rax, r8
        mov     rcx, rax
        shl     rcx, 8
        or      rcx, rax
        mov     r9, rbx
        movabs  rbx, 1157144660301377551
        and     rcx, rbx
        mov     rax, rcx
        shl     rax, 4
        or      rax, rcx
        and     rax, rbp
        lea     rdi, [4*rax]
        or      rdi, rax
        and     rdi, r12
        pextrd  eax, xmm6, 1
        and     eax, 4194303
        mov     rcx, rax
        shl     rcx, 32
        or      rcx, rax
        shl     rax, 16
        or      rax, rcx
        and     rax, r8
        mov     rcx, rax
        shl     rcx, 8
        or      rcx, rax
        and     rcx, rbx
        mov     rdx, rcx
        shl     rdx, 4
        or      rdx, rcx
        and     rdx, rbp
        lea     rax, [4*rdx]
        or      rax, rdx
        and     rax, r12
        movd    ecx, xmm6
        and     ecx, 4194303
        mov     rdx, rcx
        shl     rdx, 32
        or      rdx, rcx
        shl     rcx, 16
        or      rcx, rdx
        and     rcx, r8
        mov     rdx, rcx
        shl     rdx, 8
        or      rdx, rcx
        and     rdx, rbx
        mov     rbx, r9
        mov     rcx, rdx
        shl     rcx, 4
        or      rcx, rdx
        and     rcx, rbp
        lea     rdx, [4*rcx]
        or      rdx, rcx
        and     rdx, r12
        lea     r15, [rdx + 2*rax]
        movsxd  rax, dword ptr [rsi + 68]
        cmp     r14, rax
        jl      .LBB0_15
        movsxd  rcx, dword ptr [rsi + 72]
        cmp     r14, rcx
        jle     .LBB0_21
.LBB0_15:
        movsxd  rcx, dword ptr [rsi + 64]
        cmp     r14, rcx
        jge     .LBB0_18
        test    eax, eax
        jne     .LBB0_19
        dec     ecx
        cmp     dword ptr [rsi + 72], ecx
        jne     .LBB0_19
        .p2align        4, 0x90
.LBB0_18:
        movabs  rcx, offset .Lburst_abort.error.id.5
        movabs  rdx, offset .Lburst_abort.error.message.6
        jmp     .LBB0_20
        .p2align        4, 0x90
.LBB0_19:
        movabs  rcx, offset .Lburst_abort.error.id.7
        movabs  rdx, offset .Lburst_abort.error.message.8
.LBB0_20:
        call    qword ptr [rbx]
.LBB0_21:
        lea     rdi, [r15 + 4*rdi]
        mov     rax, qword ptr [rsi + 80]
        mov     ecx, dword ptr [rsi + 88]
        mov     edx, dword ptr [rax]
        and     edx, -6
        cmp     ecx, edx
        je      .LBB0_23
        mov     edx, dword ptr [rsi + 92]
        mov     qword ptr [rsp + 32], rax
        mov     dword ptr [rsp + 40], ecx
        mov     dword ptr [rsp + 44], edx
        lea     rcx, [rsp + 32]
        movabs  rax, offset ".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckWriteAndThrowNoEarlyOut_Injected_Ptr"
        call    qword ptr [rax]
.LBB0_23:
        mov     rax, qword ptr [rsi + 56]
        mov     qword ptr [rax + 8*r14], rdi
        inc     r14
        movsxd  rax, dword ptr [rsi + 8]
        add     r13, 12
        cmp     r14, rax
        jl      .LBB0_2
.LBB0_24:
        movaps  xmm6, xmmword ptr [rsp + 48]
        add     rsp, 72
        pop     rbx
        pop     rbp
        pop     rdi
        pop     rsi
        pop     r12
        pop     r13
        pop     r14
        pop     r15
        ret
.Lfunc_end0:
        .size   "Unity.Jobs.IJobExtensions.JobStruct`1<InfPoints.Octree.Jobs.Morton64EncodeJob>.Execute(ref InfPoints.Octree.Jobs.Morton64EncodeJob data, System.IntPtr additionalPtr, System.IntPtr bufferRangePatchData, ref Unity.Jobs.LowLevel.Unsafe.JobRanges ranges, int jobIndex)_A91CEEBEFAC9E263", .Lfunc_end0-"Unity.Jobs.IJobExtensions.JobStruct`1<InfPoints.Octree.Jobs.Morton64EncodeJob>.Execute(ref InfPoints.Octree.Jobs.Morton64EncodeJob data, System.IntPtr additionalPtr, System.IntPtr bufferRangePatchData, ref Unity.Jobs.LowLevel.Unsafe.JobRanges ranges, int jobIndex)_A91CEEBEFAC9E263"
        .cfi_endproc

        .globl  burst.initialize
        .p2align        4, 0x90
        .type   burst.initialize,@function
burst.initialize:
        .cfi_startproc
        push    rsi
        .cfi_def_cfa_offset 16
        sub     rsp, 32
        .cfi_def_cfa_offset 48
        .cfi_offset rsi, -16
        mov     rsi, rcx
        movabs  rcx, offset .Lburst_abort.function.string
        call    rsi
        movabs  rcx, offset .Lburst_abort_Ptr
        mov     qword ptr [rcx], rax
        movabs  rcx, offset ".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckReadAndThrowNoEarlyOut_Injected.function.string"
        call    rsi
        movabs  rcx, offset ".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckReadAndThrowNoEarlyOut_Injected_Ptr"
        mov     qword ptr [rcx], rax
        movabs  rcx, offset ".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckWriteAndThrowNoEarlyOut_Injected.function.string"
        call    rsi
        movabs  rcx, offset ".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckWriteAndThrowNoEarlyOut_Injected_Ptr"
        mov     qword ptr [rcx], rax
        add     rsp, 32
        pop     rsi
        ret
.Lfunc_end1:
        .size   burst.initialize, .Lfunc_end1-burst.initialize
        .cfi_endproc

        .type   .Lburst_abort.error.id,@object
        .section        .rodata,"a",@progbits
.Lburst_abort.error.id:
        .asciz  "System.OverflowException"
        .size   .Lburst_abort.error.id, 25

        .type   .Lburst_abort.error.message,@object
.Lburst_abort.error.message:
        .asciz  "An element of coordinates {0} is larger then the maximum {1}\nThrown from job: InfPoints.Octree.Jobs.Morton64EncodeJob"
        .size   .Lburst_abort.error.message, 118

        .type   .Lburst_abort.error.id.1,@object
.Lburst_abort.error.id.1:
        .asciz  "System.IndexOutOfRangeException"
        .size   .Lburst_abort.error.id.1, 32

        .type   .Lburst_abort.error.message.2,@object
.Lburst_abort.error.message.2:
        .asciz  "Index {0} is out of range of '{1}' Length.\nThrown from job: InfPoints.Octree.Jobs.Morton64EncodeJob"
        .size   .Lburst_abort.error.message.2, 100

        .type   .Lburst_abort.error.id.3,@object
.Lburst_abort.error.id.3:
        .asciz  "System.IndexOutOfRangeException"
        .size   .Lburst_abort.error.id.3, 32

        .type   .Lburst_abort.error.message.4,@object
.Lburst_abort.error.message.4:
        .asciz  "Index {0} is out of restricted IJobParallelFor range [{1}...{2}] in ReadWriteBuffer.\n\nThrown from job: InfPoints.Octree.Jobs.Morton64EncodeJob"
        .size   .Lburst_abort.error.message.4, 143

        .type   .Lburst_abort.error.id.5,@object
.Lburst_abort.error.id.5:
        .asciz  "System.IndexOutOfRangeException"
        .size   .Lburst_abort.error.id.5, 32

        .type   .Lburst_abort.error.message.6,@object
.Lburst_abort.error.message.6:
        .asciz  "Index {0} is out of range of '{1}' Length.\nThrown from job: InfPoints.Octree.Jobs.Morton64EncodeJob"
        .size   .Lburst_abort.error.message.6, 100

        .type   .Lburst_abort.error.id.7,@object
.Lburst_abort.error.id.7:
        .asciz  "System.IndexOutOfRangeException"
        .size   .Lburst_abort.error.id.7, 32

        .type   .Lburst_abort.error.message.8,@object
.Lburst_abort.error.message.8:
        .asciz  "Index {0} is out of restricted IJobParallelFor range [{1}...{2}] in ReadWriteBuffer.\n\nThrown from job: InfPoints.Octree.Jobs.Morton64EncodeJob"
        .size   .Lburst_abort.error.message.8, 143

        .type   .Lburst_abort_Ptr,@object
        .local  .Lburst_abort_Ptr
        .comm   .Lburst_abort_Ptr,8,8
        .type   .Lburst_abort.function.string,@object
.Lburst_abort.function.string:
        .asciz  "burst_abort"
        .size   .Lburst_abort.function.string, 12

        .type   ".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckReadAndThrowNoEarlyOut_Injected_Ptr",@object
        .local  ".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckReadAndThrowNoEarlyOut_Injected_Ptr"
        .comm   ".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckReadAndThrowNoEarlyOut_Injected_Ptr",8,8
        .type   ".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckReadAndThrowNoEarlyOut_Injected.function.string",@object
".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckReadAndThrowNoEarlyOut_Injected.function.string":
        .asciz  "Unity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckReadAndThrowNoEarlyOut_Injected"
        .size   ".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckReadAndThrowNoEarlyOut_Injected.function.string", 91

        .type   ".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckWriteAndThrowNoEarlyOut_Injected_Ptr",@object
        .local  ".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckWriteAndThrowNoEarlyOut_Injected_Ptr"
        .comm   ".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckWriteAndThrowNoEarlyOut_Injected_Ptr",8,8
        .type   ".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckWriteAndThrowNoEarlyOut_Injected.function.string",@object
".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckWriteAndThrowNoEarlyOut_Injected.function.string":
        .asciz  "Unity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckWriteAndThrowNoEarlyOut_Injected"
        .size   ".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckWriteAndThrowNoEarlyOut_Injected.function.string", 92


        .section        ".note.GNU-stack","",@progbits