Update Unifier2.cpp#1742
Update Unifier2.cpp#1742RobertP2705 wants to merge 1 commit intoluau-lang:masterfrom RobertP2705:patch-2
Conversation
Better conditional formatting leads to around 40% less computations.
|
This change sounds performance-related in motivation. Can you provide any data about the improvement you're achieving here? "40% less computations" seems like an arbitrary claim. |
Here's a computation test I ran in cpp. Increments based on each conditional parsed for both the original and my version. We see a standardization of comparisons between 3-4 and a reduction of 32% in total computations. The original has computations that range from 2-8. |
|
This is not a benchmark. It does not measure timing of the operations. I'd expect to see timings from the execution of each function. |
using TypeId = void**;
namespace FFlag {
bool LuauUnifyMetatableWithAny = true;
}
extern TypeId get(TypeId blah);
extern bool doWork(void* subTy, void* superTy);
bool unify(TypeId subTy, TypeId superTy)
{
auto subAny = get(subTy);
auto superAny = get(superTy);
auto subMetatable = get(subTy);
auto superMetatable = get(superTy);
if (subMetatable && superMetatable)
return doWork(subMetatable, superMetatable);
else if (FFlag::LuauUnifyMetatableWithAny && subMetatable && superAny)
return doWork(subMetatable, superAny);
else if (FFlag::LuauUnifyMetatableWithAny && subAny && superMetatable)
return doWork(subAny, superMetatable);
else if (subMetatable) // if we only have one metatable, unify with the inner table
return doWork(*subMetatable, superTy);
else if (superMetatable) // if we only have one metatable, unify with the inner table
return doWork(subTy, *superMetatable);
// The unification failed, but we're not doing type checking.
return true;
}
bool optimizedUnify(TypeId subTy, TypeId superTy)
{
auto subAny = get(subTy);
auto superAny = get(superTy);
auto subMetatable = get(subTy);
auto superMetatable = get(superTy);
const bool flagEnabled = FFlag::LuauUnifyMetatableWithAny;
if (subMetatable) {
if (superMetatable)
return doWork(subMetatable, superMetatable);
else if (flagEnabled && superAny)
return doWork(subMetatable, superAny);
else
return doWork(*subMetatable, superTy); // if we only have one metatable, unify with the inner table
}
else if (superMetatable) {
if (flagEnabled && subAny)
return doWork(subAny, superMetatable);
else
return doWork(subTy, *superMetatable); // if we only have one metatable, unify with the inner table
}
// The unification failed, but we're not doing type checking.
return true;
}Using the code above (which I believe to be a very faithful reproduction of your submitted change in a standalone environment), we can look at the actual generated code from clang 15 with O3. unify(void**, void**):
stp x29, x30, [sp, #-64]!
str x23, [sp, #16]
stp x22, x21, [sp, #32]
stp x20, x19, [sp, #48]
mov x29, sp
mov x20, x1
mov x19, x0
bl get(void**)
mov x21, x0
mov x0, x20
bl get(void**)
mov x23, x0
mov x0, x19
bl get(void**)
mov x22, x0
mov x0, x20
bl get(void**)
mov x1, x0
cbz x22, .LBB0_3
cbz x1, .LBB0_3
mov x0, x22
ldp x20, x19, [sp, #48]
ldp x22, x21, [sp, #32]
ldr x23, [sp, #16]
ldp x29, x30, [sp], #64
b doWork(void*, void*)
.LBB0_3:
adrp x8, FFlag::LuauUnifyMetatableWithAny
ldrb w8, [x8, :lo12:FFlag::LuauUnifyMetatableWithAny]
cbz x23, .LBB0_7
cbz x22, .LBB0_7
cbz w8, .LBB0_7
mov x0, x22
mov x1, x23
ldp x20, x19, [sp, #48]
ldp x22, x21, [sp, #32]
ldr x23, [sp, #16]
ldp x29, x30, [sp], #64
b doWork(void*, void*)
.LBB0_7:
cbz x1, .LBB0_11
cbz x21, .LBB0_11
cbz w8, .LBB0_11
mov x0, x21
ldp x20, x19, [sp, #48]
ldp x22, x21, [sp, #32]
ldr x23, [sp, #16]
ldp x29, x30, [sp], #64
b doWork(void*, void*)
.LBB0_11:
cbz x22, .LBB0_13
ldr x0, [x22]
mov x1, x20
ldp x20, x19, [sp, #48]
ldp x22, x21, [sp, #32]
ldr x23, [sp, #16]
ldp x29, x30, [sp], #64
b doWork(void*, void*)
.LBB0_13:
cbz x1, .LBB0_15
ldr x1, [x1]
mov x0, x19
ldp x20, x19, [sp, #48]
ldp x22, x21, [sp, #32]
ldr x23, [sp, #16]
ldp x29, x30, [sp], #64
b doWork(void*, void*)
.LBB0_15:
mov w0, #1
ldp x20, x19, [sp, #48]
ldp x22, x21, [sp, #32]
ldr x23, [sp, #16]
ldp x29, x30, [sp], #64
ret
optimizedUnify(void**, void**):
stp x29, x30, [sp, #-64]!
str x23, [sp, #16]
stp x22, x21, [sp, #32]
stp x20, x19, [sp, #48]
mov x29, sp
mov x19, x1
mov x20, x0
bl get(void**)
mov x21, x0
mov x0, x19
bl get(void**)
mov x22, x0
mov x0, x20
bl get(void**)
mov x23, x0
mov x0, x19
bl get(void**)
adrp x8, FFlag::LuauUnifyMetatableWithAny
mov x1, x0
ldrb w8, [x8, :lo12:FFlag::LuauUnifyMetatableWithAny]
cbz x23, .LBB1_3
cbz x1, .LBB1_7
mov x0, x23
ldp x20, x19, [sp, #48]
ldp x22, x21, [sp, #32]
ldr x23, [sp, #16]
ldp x29, x30, [sp], #64
b doWork(void*, void*)
.LBB1_3:
cbz x1, .LBB1_10
cbz x21, .LBB1_11
cbz w8, .LBB1_11
mov x0, x21
ldp x20, x19, [sp, #48]
ldp x22, x21, [sp, #32]
ldr x23, [sp, #16]
ldp x29, x30, [sp], #64
b doWork(void*, void*)
.LBB1_7:
cbz x22, .LBB1_12
cbz w8, .LBB1_12
mov x0, x23
mov x1, x22
ldp x20, x19, [sp, #48]
ldp x22, x21, [sp, #32]
ldr x23, [sp, #16]
ldp x29, x30, [sp], #64
b doWork(void*, void*)
.LBB1_10:
mov w0, #1
ldp x20, x19, [sp, #48]
ldp x22, x21, [sp, #32]
ldr x23, [sp, #16]
ldp x29, x30, [sp], #64
ret
.LBB1_11:
ldr x1, [x1]
mov x0, x20
ldp x20, x19, [sp, #48]
ldp x22, x21, [sp, #32]
ldr x23, [sp, #16]
ldp x29, x30, [sp], #64
b doWork(void*, void*)
.LBB1_12:
ldr x0, [x23]
mov x1, x19
ldp x20, x19, [sp, #48]
ldp x22, x21, [sp, #32]
ldr x23, [sp, #16]
ldp x29, x30, [sp], #64
b doWork(void*, void*)You can check it out in a diffing tool here: https://www.diffchecker.com/WAV15Ehc/ |
Better conditional formatting leads to around 40% less computations on conditional statements. Caching flag value reduces redundancy.