Just wondering why there isn’t longn or ulongn types in Unity.Mathematics? Should I be using double types and converting?
I created an issue on the github repo regarding long and ulong vector types a few months ago.
https://github.com/Unity-Technologies/Unity.Mathematics/issues/69
Unfortunately I never got an answer
I am really wondering why even double is supported but not 64bit integer numbers. The benefits are obvious and I don’t see any technical reasons that whould explain why it is not already supported yet.
That’s a shame. I’m getting a lot out of the auto-vectorisation of int4, but need a long4 equivalent. I don’t want to lose all those perf gains!
64bit integer numbers are supported like double (long/ulong). But not a vector version of both double and 64bit integer (double4/long4/ulong4).
Maybe the technical reason is because uint4, int4, float4 all sums to 32 bits x 4 = 128 and it is possible to fit in xmm registers in AVX or similar sized register that they could ensure wider support. long4 / double4 would require special case and more work for some architecture with 256 bit register, then a special routine to divide and conquer with smaller registers on device without 256 bit commands?
There is a double4. Am I misunderstanding something?
Good point. I know very little about this, but it does seem like AVX2 has been around since 2013/2015 for Intel/AMD. But I suppose there would still need to be support for non-AVX2 processors.
I was referencing from Burst documentation, seems like it maybe burstable in the future and you can just use it now.
Ah I see, thanks 5argon.
Still a bit of a mystery why double4 is supported but long4 isn’t, even as a non-vector type.
It may be supported in the future. What kinds of operations are you looking to perform with longN types?
I’d like SIMD support for decoding multiple 64 bit Morton numbers. https://github.com/johnsietsma/InfPoints/blob/master/com.infpoints.octree/Runtime/Morton.cs
Thanks for the example.
I saw your comment in that code sample on DecodeMorton64 which says: /// Burst will not auto-vectorise 64 bit types.
Do you have the assembly for that we can look at?
Ah, I wasn’t very clear on that comment. I meant there is no long4 type, so no packed assembly instructions. The burst docs don’t list doublen a vector type, so there is no 64 bit vectorisation.
I’ve added jobs and tests for the 64bit morton encoding to the project.
I’ve previously measured the 32bit perf and looked at it’s assembly, Morton Order - Burst | John Sietsma. Having a uint4 version of the encoding and decoding functions doubles performance, I was hoping to do the same with long4.
Assembly for the 64bit decode job is below:
.text
.intel_syntax noprefix
.file "main"
.globl "Unity.Jobs.IJobExtensions.JobStruct`1<InfPoints.Octree.Jobs.Morton64EncodeJob>.Execute(ref InfPoints.Octree.Jobs.Morton64EncodeJob data, System.IntPtr additionalPtr, System.IntPtr bufferRangePatchData, ref Unity.Jobs.LowLevel.Unsafe.JobRanges ranges, int jobIndex)_A91CEEBEFAC9E263"
.p2align 4, 0x90
.type "Unity.Jobs.IJobExtensions.JobStruct`1<InfPoints.Octree.Jobs.Morton64EncodeJob>.Execute(ref InfPoints.Octree.Jobs.Morton64EncodeJob data, System.IntPtr additionalPtr, System.IntPtr bufferRangePatchData, ref Unity.Jobs.LowLevel.Unsafe.JobRanges ranges, int jobIndex)_A91CEEBEFAC9E263",@function
"Unity.Jobs.IJobExtensions.JobStruct`1<InfPoints.Octree.Jobs.Morton64EncodeJob>.Execute(ref InfPoints.Octree.Jobs.Morton64EncodeJob data, System.IntPtr additionalPtr, System.IntPtr bufferRangePatchData, ref Unity.Jobs.LowLevel.Unsafe.JobRanges ranges, int jobIndex)_A91CEEBEFAC9E263":
.cfi_startproc
push r15
.cfi_def_cfa_offset 16
push r14
.cfi_def_cfa_offset 24
push r13
.cfi_def_cfa_offset 32
push r12
.cfi_def_cfa_offset 40
push rsi
.cfi_def_cfa_offset 48
push rdi
.cfi_def_cfa_offset 56
push rbp
.cfi_def_cfa_offset 64
push rbx
.cfi_def_cfa_offset 72
sub rsp, 72
movdqa xmmword ptr [rsp + 48], xmm6
.cfi_def_cfa_offset 144
.cfi_offset rbx, -72
.cfi_offset rbp, -64
.cfi_offset rdi, -56
.cfi_offset rsi, -48
.cfi_offset r12, -40
.cfi_offset r13, -32
.cfi_offset r14, -24
.cfi_offset r15, -16
.cfi_offset xmm6, -96
mov rsi, rcx
mov eax, dword ptr [rsi + 8]
test eax, eax
jle .LBB0_24
xor r13d, r13d
movabs rbx, offset .Lburst_abort_Ptr
movabs r12, 1317624576693539401
xor r14d, r14d
movabs rbp, 1207822528635744451
.p2align 4, 0x90
.LBB0_2:
movsxd rcx, dword ptr [rsi + 12]
cmp r14, rcx
jl .LBB0_4
movsxd rdx, dword ptr [rsi + 16]
cmp r14, rdx
jle .LBB0_9
.LBB0_4:
test ecx, ecx
jne .LBB0_7
dec eax
cmp dword ptr [rsi + 16], eax
jne .LBB0_7
movabs rcx, offset .Lburst_abort.error.id.1
movabs rdx, offset .Lburst_abort.error.message.2
jmp .LBB0_8
.p2align 4, 0x90
.LBB0_7:
movabs rcx, offset .Lburst_abort.error.id.3
movabs rdx, offset .Lburst_abort.error.message.4
.LBB0_8:
call qword ptr [rbx]
.LBB0_9:
mov rax, qword ptr [rsi + 24]
mov ecx, dword ptr [rsi + 32]
mov edx, dword ptr [rax]
and edx, -7
cmp ecx, edx
je .LBB0_11
mov edx, dword ptr [rsi + 36]
mov qword ptr [rsp + 32], rax
mov dword ptr [rsp + 40], ecx
mov dword ptr [rsp + 44], edx
lea rcx, [rsp + 32]
movabs rax, offset ".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckReadAndThrowNoEarlyOut_Injected_Ptr"
call qword ptr [rax]
.LBB0_11:
mov rax, qword ptr [rsi]
movq xmm6, qword ptr [rax + r13]
pinsrd xmm6, dword ptr [rax + r13 + 8], 2
pshufd xmm0, xmm6, 36
pshufd xmm1, xmm0, 78
pmaxud xmm1, xmm0
pshufd xmm0, xmm1, 229
pmaxud xmm0, xmm1
movd eax, xmm0
cmp eax, 2097152
jb .LBB0_13
movabs rcx, offset .Lburst_abort.error.id
movabs rdx, offset .Lburst_abort.error.message
call qword ptr [rbx]
.LBB0_13:
pextrd eax, xmm6, 2
and eax, 4194303
mov rcx, rax
shl rcx, 32
or rcx, rax
shl rax, 16
or rax, rcx
movabs r8, 8725728556220671
and rax, r8
mov rcx, rax
shl rcx, 8
or rcx, rax
mov r9, rbx
movabs rbx, 1157144660301377551
and rcx, rbx
mov rax, rcx
shl rax, 4
or rax, rcx
and rax, rbp
lea rdi, [4*rax]
or rdi, rax
and rdi, r12
pextrd eax, xmm6, 1
and eax, 4194303
mov rcx, rax
shl rcx, 32
or rcx, rax
shl rax, 16
or rax, rcx
and rax, r8
mov rcx, rax
shl rcx, 8
or rcx, rax
and rcx, rbx
mov rdx, rcx
shl rdx, 4
or rdx, rcx
and rdx, rbp
lea rax, [4*rdx]
or rax, rdx
and rax, r12
movd ecx, xmm6
and ecx, 4194303
mov rdx, rcx
shl rdx, 32
or rdx, rcx
shl rcx, 16
or rcx, rdx
and rcx, r8
mov rdx, rcx
shl rdx, 8
or rdx, rcx
and rdx, rbx
mov rbx, r9
mov rcx, rdx
shl rcx, 4
or rcx, rdx
and rcx, rbp
lea rdx, [4*rcx]
or rdx, rcx
and rdx, r12
lea r15, [rdx + 2*rax]
movsxd rax, dword ptr [rsi + 68]
cmp r14, rax
jl .LBB0_15
movsxd rcx, dword ptr [rsi + 72]
cmp r14, rcx
jle .LBB0_21
.LBB0_15:
movsxd rcx, dword ptr [rsi + 64]
cmp r14, rcx
jge .LBB0_18
test eax, eax
jne .LBB0_19
dec ecx
cmp dword ptr [rsi + 72], ecx
jne .LBB0_19
.p2align 4, 0x90
.LBB0_18:
movabs rcx, offset .Lburst_abort.error.id.5
movabs rdx, offset .Lburst_abort.error.message.6
jmp .LBB0_20
.p2align 4, 0x90
.LBB0_19:
movabs rcx, offset .Lburst_abort.error.id.7
movabs rdx, offset .Lburst_abort.error.message.8
.LBB0_20:
call qword ptr [rbx]
.LBB0_21:
lea rdi, [r15 + 4*rdi]
mov rax, qword ptr [rsi + 80]
mov ecx, dword ptr [rsi + 88]
mov edx, dword ptr [rax]
and edx, -6
cmp ecx, edx
je .LBB0_23
mov edx, dword ptr [rsi + 92]
mov qword ptr [rsp + 32], rax
mov dword ptr [rsp + 40], ecx
mov dword ptr [rsp + 44], edx
lea rcx, [rsp + 32]
movabs rax, offset ".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckWriteAndThrowNoEarlyOut_Injected_Ptr"
call qword ptr [rax]
.LBB0_23:
mov rax, qword ptr [rsi + 56]
mov qword ptr [rax + 8*r14], rdi
inc r14
movsxd rax, dword ptr [rsi + 8]
add r13, 12
cmp r14, rax
jl .LBB0_2
.LBB0_24:
movaps xmm6, xmmword ptr [rsp + 48]
add rsp, 72
pop rbx
pop rbp
pop rdi
pop rsi
pop r12
pop r13
pop r14
pop r15
ret
.Lfunc_end0:
.size "Unity.Jobs.IJobExtensions.JobStruct`1<InfPoints.Octree.Jobs.Morton64EncodeJob>.Execute(ref InfPoints.Octree.Jobs.Morton64EncodeJob data, System.IntPtr additionalPtr, System.IntPtr bufferRangePatchData, ref Unity.Jobs.LowLevel.Unsafe.JobRanges ranges, int jobIndex)_A91CEEBEFAC9E263", .Lfunc_end0-"Unity.Jobs.IJobExtensions.JobStruct`1<InfPoints.Octree.Jobs.Morton64EncodeJob>.Execute(ref InfPoints.Octree.Jobs.Morton64EncodeJob data, System.IntPtr additionalPtr, System.IntPtr bufferRangePatchData, ref Unity.Jobs.LowLevel.Unsafe.JobRanges ranges, int jobIndex)_A91CEEBEFAC9E263"
.cfi_endproc
.globl burst.initialize
.p2align 4, 0x90
.type burst.initialize,@function
burst.initialize:
.cfi_startproc
push rsi
.cfi_def_cfa_offset 16
sub rsp, 32
.cfi_def_cfa_offset 48
.cfi_offset rsi, -16
mov rsi, rcx
movabs rcx, offset .Lburst_abort.function.string
call rsi
movabs rcx, offset .Lburst_abort_Ptr
mov qword ptr [rcx], rax
movabs rcx, offset ".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckReadAndThrowNoEarlyOut_Injected.function.string"
call rsi
movabs rcx, offset ".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckReadAndThrowNoEarlyOut_Injected_Ptr"
mov qword ptr [rcx], rax
movabs rcx, offset ".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckWriteAndThrowNoEarlyOut_Injected.function.string"
call rsi
movabs rcx, offset ".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckWriteAndThrowNoEarlyOut_Injected_Ptr"
mov qword ptr [rcx], rax
add rsp, 32
pop rsi
ret
.Lfunc_end1:
.size burst.initialize, .Lfunc_end1-burst.initialize
.cfi_endproc
.type .Lburst_abort.error.id,@object
.section .rodata,"a",@progbits
.Lburst_abort.error.id:
.asciz "System.OverflowException"
.size .Lburst_abort.error.id, 25
.type .Lburst_abort.error.message,@object
.Lburst_abort.error.message:
.asciz "An element of coordinates {0} is larger then the maximum {1}\nThrown from job: InfPoints.Octree.Jobs.Morton64EncodeJob"
.size .Lburst_abort.error.message, 118
.type .Lburst_abort.error.id.1,@object
.Lburst_abort.error.id.1:
.asciz "System.IndexOutOfRangeException"
.size .Lburst_abort.error.id.1, 32
.type .Lburst_abort.error.message.2,@object
.Lburst_abort.error.message.2:
.asciz "Index {0} is out of range of '{1}' Length.\nThrown from job: InfPoints.Octree.Jobs.Morton64EncodeJob"
.size .Lburst_abort.error.message.2, 100
.type .Lburst_abort.error.id.3,@object
.Lburst_abort.error.id.3:
.asciz "System.IndexOutOfRangeException"
.size .Lburst_abort.error.id.3, 32
.type .Lburst_abort.error.message.4,@object
.Lburst_abort.error.message.4:
.asciz "Index {0} is out of restricted IJobParallelFor range [{1}...{2}] in ReadWriteBuffer.\n\nThrown from job: InfPoints.Octree.Jobs.Morton64EncodeJob"
.size .Lburst_abort.error.message.4, 143
.type .Lburst_abort.error.id.5,@object
.Lburst_abort.error.id.5:
.asciz "System.IndexOutOfRangeException"
.size .Lburst_abort.error.id.5, 32
.type .Lburst_abort.error.message.6,@object
.Lburst_abort.error.message.6:
.asciz "Index {0} is out of range of '{1}' Length.\nThrown from job: InfPoints.Octree.Jobs.Morton64EncodeJob"
.size .Lburst_abort.error.message.6, 100
.type .Lburst_abort.error.id.7,@object
.Lburst_abort.error.id.7:
.asciz "System.IndexOutOfRangeException"
.size .Lburst_abort.error.id.7, 32
.type .Lburst_abort.error.message.8,@object
.Lburst_abort.error.message.8:
.asciz "Index {0} is out of restricted IJobParallelFor range [{1}...{2}] in ReadWriteBuffer.\n\nThrown from job: InfPoints.Octree.Jobs.Morton64EncodeJob"
.size .Lburst_abort.error.message.8, 143
.type .Lburst_abort_Ptr,@object
.local .Lburst_abort_Ptr
.comm .Lburst_abort_Ptr,8,8
.type .Lburst_abort.function.string,@object
.Lburst_abort.function.string:
.asciz "burst_abort"
.size .Lburst_abort.function.string, 12
.type ".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckReadAndThrowNoEarlyOut_Injected_Ptr",@object
.local ".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckReadAndThrowNoEarlyOut_Injected_Ptr"
.comm ".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckReadAndThrowNoEarlyOut_Injected_Ptr",8,8
.type ".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckReadAndThrowNoEarlyOut_Injected.function.string",@object
".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckReadAndThrowNoEarlyOut_Injected.function.string":
.asciz "Unity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckReadAndThrowNoEarlyOut_Injected"
.size ".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckReadAndThrowNoEarlyOut_Injected.function.string", 91
.type ".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckWriteAndThrowNoEarlyOut_Injected_Ptr",@object
.local ".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckWriteAndThrowNoEarlyOut_Injected_Ptr"
.comm ".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckWriteAndThrowNoEarlyOut_Injected_Ptr",8,8
.type ".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckWriteAndThrowNoEarlyOut_Injected.function.string",@object
".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckWriteAndThrowNoEarlyOut_Injected.function.string":
.asciz "Unity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckWriteAndThrowNoEarlyOut_Injected"
.size ".LUnity.Collections.LowLevel.Unsafe.AtomicSafetyHandle::CheckWriteAndThrowNoEarlyOut_Injected.function.string", 92
.section ".note.GNU-stack","",@progbits