Skip to content

Update Unifier2.cpp#1742

Closed
RobertP2705 wants to merge 1 commit intoluau-lang:masterfrom
RobertP2705:patch-2
Closed

Update Unifier2.cpp#1742
RobertP2705 wants to merge 1 commit intoluau-lang:masterfrom
RobertP2705:patch-2

Conversation

@RobertP2705
Copy link

Better conditional formatting leads to around 40% less computations on conditional statements. Caching flag value reduces redundancy.

Better conditional formatting leads to around 40% less computations.
@aatxe
Copy link
Member

aatxe commented Mar 19, 2025

This change sounds performance-related in motivation. Can you provide any data about the improvement you're achieving here? "40% less computations" seems like an arbitrary claim.

@RobertP2705
Copy link
Author

RobertP2705 commented Mar 19, 2025

#include <string>
#include <vector>
#include <iomanip> 

namespace FFlag {
    bool LuauUnifyMetatableWithAny = true;
}

int conditionCount = 0;

void resetCounter() {
    conditionCount = 0;
}

bool condition(bool value) {
    conditionCount++;
    return value;
}

void originalUnify(
    bool subMetatable, 
    bool superMetatable,
    bool subAny, 
    bool superAny
) {
    resetCounter();
    
    if (condition(subMetatable) && condition(superMetatable))
        return;
    else if (condition(FFlag::LuauUnifyMetatableWithAny) && condition(subMetatable) && condition(superAny))
        return;
    else if (condition(FFlag::LuauUnifyMetatableWithAny) && condition(subAny) && condition(superMetatable))
        return;
    else if (condition(subMetatable)) // if we only have one metatable, unify with the inner table
        return;
    else if (condition(superMetatable)) // if we only have one metatable, unify with the inner table
        return;
    
    return;
}

void optimizedUnify(
    bool subMetatable, 
    bool superMetatable,
    bool subAny, 
    bool superAny
) {
    resetCounter();
    
    // Cache the flag check since it's used multiple times
    const bool flagEnabled = condition(FFlag::LuauUnifyMetatableWithAny);
    
    if (condition(subMetatable)) {
        if (condition(superMetatable))
            return;
        else if (flagEnabled && condition(superAny))
            return;
        else
            return;
    } 
    else if (condition(superMetatable)) {
        if (flagEnabled && condition(subAny))
            return;
        else
            return;
    }
    
    return;
}

struct TestCase {
    std::string name;
    bool subMetatable;
    bool superMetatable;
    bool subAny;
    bool superAny;
    bool flagEnabled;
    
    TestCase(
        const std::string& n, 
        bool subM, 
        bool superM, 
        bool subA, 
        bool superA, 
        bool flag = true
    ) : name(n), 
        subMetatable(subM), 
        superMetatable(superM), 
        subAny(subA), 
        superAny(superA), 
        flagEnabled(flag) {}
};

int main() {
    std::vector<TestCase> testCases = {
        TestCase("Test 1", true, true, false, false),
        TestCase("Test 2", true, false, false, true),
        TestCase("Test 3", false, true, true, false),
        TestCase("Test 4", true, false, false, false),
        TestCase("Test 5", false, true, false, false),
        TestCase("Test 6", false, false, true, true),
        TestCase("Test 7", true, true, true, true),
        TestCase("Test 8", true, true, false, false, false),
        TestCase("Test 9", true, false, false, true, false),
        TestCase("Test 10", false, true, true, false, false)
    };
    
    struct TestResult {
        std::string name;
        int originalChecks;
        int optimizedChecks;
        double reduction;
    };
    
    std::vector<TestResult> results;
    
    int totalOriginalChecks = 0;
    int totalOptimizedChecks = 0;
    
    for (const auto& test : testCases) {
        FFlag::LuauUnifyMetatableWithAny = test.flagEnabled;
        
        originalUnify(
            test.subMetatable, test.superMetatable, 
            test.subAny, test.superAny
        );
        int originalChecks = conditionCount;
        totalOriginalChecks += originalChecks;
        
        optimizedUnify(
            test.subMetatable, test.superMetatable, 
            test.subAny, test.superAny
        );
        int optimizedChecks = conditionCount;
        totalOptimizedChecks += optimizedChecks;
        
        double reduction = 100.0 * (originalChecks - optimizedChecks) / originalChecks;
        
        results.push_back({
            test.name,
            originalChecks,
            optimizedChecks,
            reduction
        });
    }
    
    std::cout << "=== Condition Count Test Results ===" << std::endl;
    std::cout << "+-----------------------+-----------------+-----------------+------------+" << std::endl;
    std::cout << "| Test Case             | Original Checks | Optimized Checks | Reduction  |" << std::endl;
    std::cout << "+-----------------------+-----------------+-----------------+------------+" << std::endl;
    
    for (const auto& result : results) {
        std::cout << "| " << std::left << std::setw(21) << result.name << " | ";
        std::cout << std::right << std::setw(15) << result.originalChecks << " | ";
        std::cout << std::right << std::setw(15) << result.optimizedChecks << " | ";
        std::cout << std::right << std::setw(8) << std::fixed << std::setprecision(2) << result.reduction << "% |\n ";
    }
    
    
    double totalReduction = 100.0 * (totalOriginalChecks - totalOptimizedChecks) / totalOriginalChecks;
    std::cout << "\nTotals across all test cases:" << std::endl;
    std::cout << "- Original checks: " << totalOriginalChecks << std::endl;
    std::cout << "- Optimized checks: " << totalOptimizedChecks << std::endl;
    std::cout << "- Overall reduction: " << std::fixed << std::setprecision(2) << totalReduction << "%" << std::endl;
    
    return 0;
}

Here's a computation test I ran in cpp. Increments based on each conditional parsed for both the original and my version. We see a standardization of comparisons between 3-4 and a reduction of 32% in total computations. The original has computations that range from 2-8.

@aatxe
Copy link
Member

aatxe commented Mar 19, 2025

This is not a benchmark. It does not measure timing of the operations. I'd expect to see timings from the execution of each function.

@aatxe
Copy link
Member

aatxe commented Mar 19, 2025

using TypeId = void**;

namespace FFlag {
    bool LuauUnifyMetatableWithAny = true;
}

extern TypeId get(TypeId blah);
extern bool doWork(void* subTy, void* superTy);

bool unify(TypeId subTy, TypeId superTy)
{
    auto subAny = get(subTy);
    auto superAny = get(superTy);

    auto subMetatable = get(subTy);
    auto superMetatable = get(superTy);

    if (subMetatable && superMetatable)
        return doWork(subMetatable, superMetatable);
    else if (FFlag::LuauUnifyMetatableWithAny && subMetatable && superAny)
        return doWork(subMetatable, superAny);
    else if (FFlag::LuauUnifyMetatableWithAny && subAny && superMetatable)
        return doWork(subAny, superMetatable);
    else if (subMetatable) // if we only have one metatable, unify with the inner table
        return doWork(*subMetatable, superTy);
    else if (superMetatable) // if we only have one metatable, unify with the inner table
        return doWork(subTy, *superMetatable);

    // The unification failed, but we're not doing type checking.
    return true;
}

bool optimizedUnify(TypeId subTy, TypeId superTy)
{
    auto subAny = get(subTy);
    auto superAny = get(superTy);

    auto subMetatable = get(subTy);
    auto superMetatable = get(superTy);

    const bool flagEnabled = FFlag::LuauUnifyMetatableWithAny;

    if (subMetatable) {
        if (superMetatable)
            return doWork(subMetatable, superMetatable);
        else if (flagEnabled && superAny)
            return doWork(subMetatable, superAny);
        else
            return doWork(*subMetatable, superTy); // if we only have one metatable, unify with the inner table
    } 
    else if (superMetatable) {
        if (flagEnabled && subAny)
            return doWork(subAny, superMetatable);
        else
            return doWork(subTy, *superMetatable); // if we only have one metatable, unify with the inner table
    }

    // The unification failed, but we're not doing type checking.
    return true;
}

Using the code above (which I believe to be a very faithful reproduction of your submitted change in a standalone environment), we can look at the actual generated code from clang 15 with O3.

unify(void**, void**):
        stp     x29, x30, [sp, #-64]!
        str     x23, [sp, #16]
        stp     x22, x21, [sp, #32]
        stp     x20, x19, [sp, #48]
        mov     x29, sp
        mov     x20, x1
        mov     x19, x0
        bl      get(void**)
        mov     x21, x0
        mov     x0, x20
        bl      get(void**)
        mov     x23, x0
        mov     x0, x19
        bl      get(void**)
        mov     x22, x0
        mov     x0, x20
        bl      get(void**)
        mov     x1, x0
        cbz     x22, .LBB0_3
        cbz     x1, .LBB0_3
        mov     x0, x22
        ldp     x20, x19, [sp, #48]
        ldp     x22, x21, [sp, #32]
        ldr     x23, [sp, #16]
        ldp     x29, x30, [sp], #64
        b       doWork(void*, void*)
.LBB0_3:
        adrp    x8, FFlag::LuauUnifyMetatableWithAny
        ldrb    w8, [x8, :lo12:FFlag::LuauUnifyMetatableWithAny]
        cbz     x23, .LBB0_7
        cbz     x22, .LBB0_7
        cbz     w8, .LBB0_7
        mov     x0, x22
        mov     x1, x23
        ldp     x20, x19, [sp, #48]
        ldp     x22, x21, [sp, #32]
        ldr     x23, [sp, #16]
        ldp     x29, x30, [sp], #64
        b       doWork(void*, void*)
.LBB0_7:
        cbz     x1, .LBB0_11
        cbz     x21, .LBB0_11
        cbz     w8, .LBB0_11
        mov     x0, x21
        ldp     x20, x19, [sp, #48]
        ldp     x22, x21, [sp, #32]
        ldr     x23, [sp, #16]
        ldp     x29, x30, [sp], #64
        b       doWork(void*, void*)
.LBB0_11:
        cbz     x22, .LBB0_13
        ldr     x0, [x22]
        mov     x1, x20
        ldp     x20, x19, [sp, #48]
        ldp     x22, x21, [sp, #32]
        ldr     x23, [sp, #16]
        ldp     x29, x30, [sp], #64
        b       doWork(void*, void*)
.LBB0_13:
        cbz     x1, .LBB0_15
        ldr     x1, [x1]
        mov     x0, x19
        ldp     x20, x19, [sp, #48]
        ldp     x22, x21, [sp, #32]
        ldr     x23, [sp, #16]
        ldp     x29, x30, [sp], #64
        b       doWork(void*, void*)
.LBB0_15:
        mov     w0, #1
        ldp     x20, x19, [sp, #48]
        ldp     x22, x21, [sp, #32]
        ldr     x23, [sp, #16]
        ldp     x29, x30, [sp], #64
        ret

optimizedUnify(void**, void**):
        stp     x29, x30, [sp, #-64]!
        str     x23, [sp, #16]
        stp     x22, x21, [sp, #32]
        stp     x20, x19, [sp, #48]
        mov     x29, sp
        mov     x19, x1
        mov     x20, x0
        bl      get(void**)
        mov     x21, x0
        mov     x0, x19
        bl      get(void**)
        mov     x22, x0
        mov     x0, x20
        bl      get(void**)
        mov     x23, x0
        mov     x0, x19
        bl      get(void**)
        adrp    x8, FFlag::LuauUnifyMetatableWithAny
        mov     x1, x0
        ldrb    w8, [x8, :lo12:FFlag::LuauUnifyMetatableWithAny]
        cbz     x23, .LBB1_3
        cbz     x1, .LBB1_7
        mov     x0, x23
        ldp     x20, x19, [sp, #48]
        ldp     x22, x21, [sp, #32]
        ldr     x23, [sp, #16]
        ldp     x29, x30, [sp], #64
        b       doWork(void*, void*)
.LBB1_3:
        cbz     x1, .LBB1_10
        cbz     x21, .LBB1_11
        cbz     w8, .LBB1_11
        mov     x0, x21
        ldp     x20, x19, [sp, #48]
        ldp     x22, x21, [sp, #32]
        ldr     x23, [sp, #16]
        ldp     x29, x30, [sp], #64
        b       doWork(void*, void*)
.LBB1_7:
        cbz     x22, .LBB1_12
        cbz     w8, .LBB1_12
        mov     x0, x23
        mov     x1, x22
        ldp     x20, x19, [sp, #48]
        ldp     x22, x21, [sp, #32]
        ldr     x23, [sp, #16]
        ldp     x29, x30, [sp], #64
        b       doWork(void*, void*)
.LBB1_10:
        mov     w0, #1
        ldp     x20, x19, [sp, #48]
        ldp     x22, x21, [sp, #32]
        ldr     x23, [sp, #16]
        ldp     x29, x30, [sp], #64
        ret
.LBB1_11:
        ldr     x1, [x1]
        mov     x0, x20
        ldp     x20, x19, [sp, #48]
        ldp     x22, x21, [sp, #32]
        ldr     x23, [sp, #16]
        ldp     x29, x30, [sp], #64
        b       doWork(void*, void*)
.LBB1_12:
        ldr     x0, [x23]
        mov     x1, x19
        ldp     x20, x19, [sp, #48]
        ldp     x22, x21, [sp, #32]
        ldr     x23, [sp, #16]
        ldp     x29, x30, [sp], #64
        b       doWork(void*, void*)

You can check it out in a diffing tool here: https://www.diffchecker.com/WAV15Ehc/
and see that while the generated code has changed order a little bit, the instructions executed are just about identical between the two in an optimized build. So, without a benchmark showing otherwise, I see no reason for us to believe that this produces any performance improvement at all.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants