Background
There is an optimization (Use C# compiler's static data support in Encoding.Preamble by stephentoub · Pull Request #20768 · dotnet/coreclr · GitHub) for the static byte array.
Unity uses Roslyn compiler since Unity2018 and this static byte array optimization is enabled.
Sample C#
using System;
public static class EmbedderHelper
{
public static ReadOnlySpan<byte> GetTemp() => new byte[] { 161 , 12, 9, 81, 66, 0, 35 };
public static byte[] GetTempArray() => new byte[] { 161 , 12, 9, 81, 66, 0, 35 };
}
The below IL codes are decompiled by ILSpy.
IL generated from sample C# codes
.method public hidebysig static
valuetype [System.Memory]System.ReadOnlySpan`1<uint8> GetTemp () cil managed
{
// Method begins at RVA 0x2087
// Code size 12 (0xc)
.maxstack 8
// return new ReadOnlySpan<byte>((void*)(&<PrivateImplementationDetails>.D45571C261D8616757BE4C4F20D13FB35A45DF22067243370B5923BF1975F120), 7);
IL_0000: ldsflda valuetype '<PrivateImplementationDetails>'/'__StaticArrayInitTypeSize=7' '<PrivateImplementationDetails>'::smile:45571C261D8616757BE4C4F20D13FB35A45DF22067243370B5923BF1975F120
IL_0005: ldc.i4.7
IL_0006: newobj instance void valuetype [System.Memory]System.ReadOnlySpan`1<uint8>::.ctor(void*, int32)
// (no C# code)
IL_000b: ret
} // end of method EmbedderHelper::GetTemp
.method public hidebysig static
uint8[] GetTempArray () cil managed
{
// Method begins at RVA 0x2094
// Code size 18 (0x12)
.maxstack 8
IL_0000: ldc.i4.7
IL_0001: newarr [mscorlib]System.Byte
IL_0006: dup
IL_0007: ldtoken field valuetype '<PrivateImplementationDetails>'/'__StaticArrayInitTypeSize=7' '<PrivateImplementationDetails>'::smile:45571C261D8616757BE4C4F20D13FB35A45DF22067243370B5923BF1975F120
IL_000c: call void [mscorlib]System.Runtime.CompilerServices.RuntimeHelpers::InitializeArray(class [mscorlib]System.Array, valuetype [mscorlib]System.RuntimeFieldHandle)
IL_0011: ret
} // end of method EmbedderHelper::GetTempArray
.class private auto ansi sealed '<PrivateImplementationDetails>'
extends [mscorlib]System.Object
{
.custom instance void [mscorlib]System.Runtime.CompilerServices.CompilerGeneratedAttribute::.ctor() = (
01 00 00 00
)
// Nested Types
.class nested private explicit ansi sealed '__StaticArrayInitTypeSize=7'
extends [mscorlib]System.ValueType
{
.pack 1
.size 7
} // end of class __StaticArrayInitTypeSize=7
// Fields
.field assembly static initonly valuetype '<PrivateImplementationDetails>'/'__StaticArrayInitTypeSize=7' D45571C261D8616757BE4C4F20D13FB35A45DF22067243370B5923BF1975F120 at I_00002A64
.data cil I_00002A64 = bytearray (
a1 0c 09 51 42 00 23
)
} // end of class <PrivateImplementationDetails>
In the .NET Framework environment, x86-64 JIT assembly is like SharpLab
The direct pointer to the static byte array data is embedded! Actually .NET Core officially supports ReadOnlySpan and optimizes a lot.
Current Status : IL2CPP C++ Code
IL2CPP C++ code generated from sample C#
// System.ReadOnlySpan`1<System.Byte> EmbedderHelper::GetTemp()
IL2CPP_EXTERN_C IL2CPP_METHOD_ATTR ReadOnlySpan_1_t03DDF1A13DD7F8143C692DB7B68817A086932726 EmbedderHelper_GetTemp_mEE36158801DF177EF1457347491610BB848253BD (const RuntimeMethod* method)
{
static bool s_Il2CppMethodInitialized;
if (!s_Il2CppMethodInitialized)
{
il2cpp_codegen_initialize_runtime_metadata((uintptr_t*)&ReadOnlySpan_1__ctor_m52E53BB8862F26B1C23ED6BF8DE68F97F9BEFDDF_RuntimeMethod_var);
il2cpp_codegen_initialize_runtime_metadata((uintptr_t*)&U3CPrivateImplementationDetailsU3E_t6BC7664D9CD46304D39A7D175BB8FFBE0B9F4528____D45571C261D8616757BE4C4F20D13FB35A45DF22067243370B5923BF1975F120_0_FieldInfo_var);
il2cpp_codegen_initialize_runtime_metadata((uintptr_t*)&U3CPrivateImplementationDetailsU3E_t6BC7664D9CD46304D39A7D175BB8FFBE0B9F4528_il2cpp_TypeInfo_var);
s_Il2CppMethodInitialized = true;
}
{
// public static ReadOnlySpan<byte> GetTemp() => new byte[] { 161 , 12, 9, 81, 66, 0, 35 };
ReadOnlySpan_1_t03DDF1A13DD7F8143C692DB7B68817A086932726 L_0;
memset((&L_0), 0, sizeof(L_0));
ReadOnlySpan_1__ctor_m52E53BB8862F26B1C23ED6BF8DE68F97F9BEFDDF_inline((&L_0), (void*)(void*)(il2cpp_codegen_get_field_data(U3CPrivateImplementationDetailsU3E_t6BC7664D9CD46304D39A7D175BB8FFBE0B9F4528____D45571C261D8616757BE4C4F20D13FB35A45DF22067243370B5923BF1975F120_0_FieldInfo_var)), 7, /*hidden argument*/ReadOnlySpan_1__ctor_m52E53BB8862F26B1C23ED6BF8DE68F97F9BEFDDF_RuntimeMethod_var);
return L_0;
}
}
// System.Byte[] EmbedderHelper::GetTempArray()
IL2CPP_EXTERN_C IL2CPP_METHOD_ATTR ByteU5BU5D_tDBBEB0E8362242FA7223000D978B0DD19D4B0726* EmbedderHelper_GetTempArray_m8862AF4158A2C302FAA2361E9A3197FAFE9D6C3C (const RuntimeMethod* method)
{
static bool s_Il2CppMethodInitialized;
if (!s_Il2CppMethodInitialized)
{
il2cpp_codegen_initialize_runtime_metadata((uintptr_t*)&ByteU5BU5D_tDBBEB0E8362242FA7223000D978B0DD19D4B0726_il2cpp_TypeInfo_var);
il2cpp_codegen_initialize_runtime_metadata((uintptr_t*)&U3CPrivateImplementationDetailsU3E_t6BC7664D9CD46304D39A7D175BB8FFBE0B9F4528____D45571C261D8616757BE4C4F20D13FB35A45DF22067243370B5923BF1975F120_0_FieldInfo_var);
s_Il2CppMethodInitialized = true;
}
{
// public static byte[] GetTempArray() => new byte[] { 161 , 12, 9, 81, 66, 0, 35 };
ByteU5BU5D_tDBBEB0E8362242FA7223000D978B0DD19D4B0726* L_0 = (ByteU5BU5D_tDBBEB0E8362242FA7223000D978B0DD19D4B0726*)(ByteU5BU5D_tDBBEB0E8362242FA7223000D978B0DD19D4B0726*)SZArrayNew(ByteU5BU5D_tDBBEB0E8362242FA7223000D978B0DD19D4B0726_il2cpp_TypeInfo_var, (uint32_t)7);
ByteU5BU5D_tDBBEB0E8362242FA7223000D978B0DD19D4B0726* L_1 = L_0;
RuntimeFieldHandle_t7BE65FC857501059EBAC9772C93B02CD413D9C96 L_2 = { reinterpret_cast<intptr_t> (U3CPrivateImplementationDetailsU3E_t6BC7664D9CD46304D39A7D175BB8FFBE0B9F4528____D45571C261D8616757BE4C4F20D13FB35A45DF22067243370B5923BF1975F120_0_FieldInfo_var) };
RuntimeHelpers_InitializeArray_mE27238308FED781F2D6A719F0903F2E1311B058F((RuntimeArray *)(RuntimeArray *)L_1, L_2, /*hidden argument*/NULL);
return L_1;
}
}
Proposal
Generate additional C++ codes which is constant unsigned char array.
const unsigned char D45571C261D8616757BE4C4F20D13FB35A45DF22067243370B5923BF1975F120[7] = { 161, 12, 9, 81, 66, 0, 35 };
And replace ldsflda with direct pointer.
// Old
(void*)(void*)il2cpp_codegen_get_field_data(U3CPrivateImplementationDetailsU3E_t6BC7664D9CD46304D39A7D175BB8FFBE0B9F4528____D45571C261D8616757BE4C4F20D13FB35A45DF22067243370B5923BF1975F120_0_FieldInfo_var)
// New
(void*)const_cast<unsigned char*>(D45571C261D8616757BE4C4F20D13FB35A45DF22067243370B5923BF1975F120)
Current il2cpp_codegen_get_field_data requires a lot of function call and pointer trampolines.
I studied il2cpp_codegen_get_field_data and found that the data pointer points to the readonly memory read from global-metadata.dat.
I know that global-metadata.dat can skip the C++ re-generation due to the change of the string literals.
In contrast to the change of the string literals, the change of the static byte array data always causes C++ re-generation.
The Roslyn compiler generates the name due to the array content. See: roslyn/src/Compilers/Core/Portable/CodeGen/PrivateImplementationDetails.cs at cf55f3a58e47298426fa971d3bd9d8857c746c65 · dotnet/roslyn · GitHub
The field name is named by the hash algorithm.
The change of the field name causes C++ code re-generation.
Since there is a C++ code re-generation, it is a good chance to embed static byte array in the C++ codes.