cecco/helpers.hpp at main · christiansenger/cecco · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
/**
 * @file helpers.hpp
 * @brief Utility functions and mathematical helpers
 * @author Christian Senger <senger@inue.uni-stuttgart.de>
 * @version 2.1
 * @date 2026
 *
 * @copyright
 * Copyright (c) 2026, Christian Senger <senger@inue.uni-stuttgart.de>
 *
 * Licensed for noncommercial use only, including academic teaching, research, and personal non-profit purposes.
 * Commercial use is prohibited without a separate commercial license. See the [LICENSE](../../LICENSE) file in the
 * repository root for full terms and how to request a commercial license.
 *
 * @section Description
 *
 * This header file provides essential utility functions and mathematical helpers. It supports:
 *
 * - **Thread-safe random number generation**: Configurable deterministic/hardware seeding with
 *   per-thread isolation using the singleton RNG class
 * - **Mathematical functions**: Extended Euclidean algorithm, modular inverse, factorial,
 *   binomial coefficients, and primality testing
 * - **Square-and-multiply** for exponentiation
 * - **Double-and-add** for multiplication
 * - **High-performance caching**: Template-based Cache class with compile-time type safety
 *   and O(1) access via std::variant and std::array
 * - **Utility functions**: Find maxima in vectors, constexpr floor function, divisibility testing
 */

#ifndef HELPERS_HPP
#define HELPERS_HPP

#include <array>
#include <atomic>
#include <cmath>
#include <cstdint>
#include <functional>
#include <map>
#include <random>
#include <thread>
#include <utility>
#include <variant>
#include <vector>

#include "InfInt.hpp"

namespace CECCO {

/**
 * @brief Thread-safe random number generator with configurable seeding
 *
 * Singleton class providing thread-local std::mt19937 generators with centralized
 * seed management. Supports both deterministic seeding (for testing/reproducibility)
 * and hardware-based seeding (for cryptographic applications).
 *
 * @section Thread_Model
 * - Each thread gets its own std::mt19937 generator (thread_local storage)
 * - Atomic seed management ensures consistent seeding across all threads
 * - Lock-free design using std::atomic operations and thread-local storage
 *
 * @section Seeding_Behavior
 * - **Deterministic mode**: All threads use seed XOR thread_id for reproducibility
 * - **Hardware mode**: Each thread uses independent hardware entropy via std::random_device
 * - **Reseeding**: Threads automatically reseed when global seed changes
 *
 * @note This class cannot be instantiated directly (all members are static, no constructors are generated). Use the
 * static interface.
 */
class RNG {
   public:
    /**
     * @brief Get thread-local random number generator
     * @return Reference to thread-local std::mt19937 generator
     *
     * Returns a reference to this thread's std::mt19937 generator. The generator
     * is initialized on first access and automatically reseeded when the global
     * seed configuration changes.
     */
    static std::mt19937& get() {
        static thread_local std::mt19937 generator{get_initial_seed()};
        if (should_reseed()) generator.seed(get_current_seed());
        return generator;
    }

    /**
     * @brief Set deterministic seed for all threads
     * @param seed Base seed value (combined with thread ID)
     *
     * Configures all thread generators to use a deterministic seed based on
     * the provided value XOR each thread's ID. This ensures reproducible
     * results across runs while maintaining thread isolation.
     */
    static void seed(uint32_t seed) {
        use_deterministic_seed.store(true);
        deterministic_seed_value.store(seed);
        reseed_generation.fetch_add(1);  // Signal all threads to reseed
    }

    /**
     * @brief Enable hardware-based random seeding
     *
     * Configures all thread generators to use independent hardware entropy
     * via std::random_device. Each thread gets its own entropy source.
     */
    static void use_hardware_seed() {
        use_deterministic_seed.store(false);
        reseed_generation.fetch_add(1);  // Signal all threads to reseed
    }

   private:
    static inline std::atomic<bool> use_deterministic_seed{false};
    static inline std::atomic<uint32_t> deterministic_seed_value{0};
    static inline std::atomic<uint32_t> reseed_generation{0};

    static uint32_t get_initial_seed() {
        if (use_deterministic_seed.load()) {
            auto tid = std::hash<std::thread::id>{}(std::this_thread::get_id());
            return deterministic_seed_value.load() ^ static_cast<uint32_t>(tid);
        } else {
            static thread_local std::random_device rd;
            return rd();
        }
    }

    static uint32_t get_current_seed() { return get_initial_seed(); }

    static bool should_reseed() {
        static thread_local uint32_t last_seen_generation = 0;
        uint32_t current_generation = reseed_generation.load();
        if (current_generation != last_seen_generation) {
            last_seen_generation = current_generation;
            return true;
        }
        return false;
    }
};

/**
 * @brief Thread-safe convenience function for random number generation
 * @return Reference to thread-local random number generator
 *
 * Returns a reference to the current thread's std::mt19937 generator. Provides a simple interface while maintaining
 * full thread safety.
 *
 * @note Equivalent to RNG::get() but with a shorter name for convenience
 */
inline std::mt19937& gen() { return RNG::get(); }

/**
 * @brief Find all indices of maximum elements in a vector
 * @tparam T Element type (must support operator< for comparison)
 * @param v Input vector to search
 * @return Vector of indices where maximum elements occur (empty if input is empty)
 *
 * Returns a vector containing the indices of all elements that have the maximum value.
 * If multiple elements share the maximum value, all their indices are returned.
 *
 * @note Time complexity: O(n) with two passes over the data
 * @note Space complexity: O(k) where k is the number of maximum elements
 *
 * @code{.cpp}
 * std::vector<int> data = {1, 3, 2, 3, 1};
 * auto indices = find_maxima(data);  // Returns {1, 3} (indices of value 3)
 * @endcode
 */
template <class T>
std::vector<size_t> find_maxima(const std::vector<T>& v) {
    std::vector<size_t> indices;
    if (v.empty()) return indices;
    const auto max_value = *std::max_element(v.begin(), v.end());
    for (size_t i = 0; i < v.size(); ++i) {
        if (v[i] == max_value) indices.push_back(i);
    }
    return indices;
}

/**
 * @brief Primality test using trial division
 * @tparam T Unsigned integer type
 * @param a Number to test for primality
 * @return true if a is prime, false otherwise
 *
 * Tests primality using optimized trial division. Only tests odd divisors
 * up to √a for efficiency. Returns false for a ≤ 1 and even numbers > 2.
 *
 * @code{.cpp}
 * bool p1 = is_prime(17);  // Returns true
 * bool p2 = is_prime(15);  // Returns false (3 * 5 == 15)
 * @endcode
 */
template <class T>
constexpr bool is_prime(T a) noexcept {
    if (a == 2) return true;
    if (a <= 1 || !(a & 1)) return false;
    // find "smaller half" of factorization (if factor > sqrt(a) there must be a factor < sqrt(a))
    for (T b = 3; b * b <= a; b += 2)
        if ((a % b) == 0) return false;
    return true;
}

/**
 * @brief Extended Euclidean algorithm for computing GCD and Bézout coefficients
 * @tparam T Signed integral type
 * @param a First integer
 * @param b Second integer
 * @param s Pointer to store Bézout coefficient for a (optional)
 * @param t Pointer to store Bézout coefficient for b (optional)
 * @return Greatest common divisor of a and b
 *
 * Computes gcd(a,b) using the Euclidean algorithm. If s and t are provided,
 * also computes Bézout coefficients such that: a*s + b*t = gcd(a,b)
 *
 * @code{.cpp}
 * auto gcd = GCD(48, 18);        // Returns 6
 * int s, t;
 * auto gcd = GCD(48, 18, &s, &t); // Returns 6, sets s=1, t=-2 (48*1 + 18*(-2) = 6)
 * @endcode
 */
template <class T>
constexpr T GCD(T a, T b, T* s = nullptr, T* t = nullptr) noexcept {
    static_assert((std::is_integral<T>::value && std::is_signed<T>::value) || std::is_same_v<T, InfInt>,
                  "GCD requires signed integral type or InfInt");
    if (s != nullptr && t != nullptr) {  // extended EA
        *s = T(1);
        *t = T(0);
        T u = T(0);
        T v = T(1);
        while (b != T(0)) {
            const T q = a / b;
            T b1 = std::move(b);
            b = a - q * b1;
            a = std::move(b1);
            T u1 = std::move(u);
            u = *s - q * u1;
            *s = std::move(u1);
            T v1 = std::move(v);
            v = *t - q * v1;
            *t = std::move(v1);
        }
    } else {  // "normal" EA
        while (b != T(0)) {
            const T q = a / b;
            T b1 = std::move(b);
            b = a - q * b1;
            a = std::move(b1);
        }
    }
    return a;
}

/**
 * @brief Modular multiplicative inverse using extended Euclidean algorithm
 * @tparam p Prime modulus (must be prime for correct results)
 * @tparam T Signed integer type supporting arithmetic operations
 * @param a Element to invert
 * @return Modular inverse a^(-1) mod p
 *
 * Computes the multiplicative inverse of a modulo p by leveraging the extended
 * Euclidean algorithm. Returns x such that (a * x) ≡ 1 (mod p).
 *
 * @code{.cpp}
 * auto inv = modinv<7>(3);   // Returns 5 (since 3*5 ≡ 1 mod 7)
 * @endcode
 */
template <uint16_t p, class T>
constexpr T modinv(T a) noexcept {
    static_assert(is_prime(p), "p is not a prime");
    T s, t;
    GCD<T>(std::move(a), T(p), &s, &t);  // don't actually need the gcd
    T result = s % T(p);
    return result < 0 ? result + T(p) : result;
}

/**
 * @brief Factorial function a!
 * @tparam T Integer type supporting arithmetic operations
 * @param n Non-negative integer
 * @return Factorial a! = a * (a-1) * ... * 2 * 1
 *
 * Computes the factorial of a using iterative multiplication.
 * Returns 1 for a = 0 and a = 1 (by mathematical convention).
 *
 * @code{.cpp}
 * auto fact5 = fac<int>(5);  // Returns 120
 * auto fact0 = fac<int>(0);  // Returns 1
 * auto big_fact = fac<InfInt>(100);  // Arbitrary precision factorial
 * @endcode
 */
template <class T>
T fac(T a) noexcept {
    T res = 1;
    while (a > 1) {
        res *= a;
        --a;
    }
    return res;
}

/**
 * @brief Binomial coefficient C(n,k) = n choose k
 * @tparam T Integer type supporting arithmetic operations
 * @param n Total number of items
 * @param k Number of items to choose
 * @return Binomial coefficient C(n,k) = n! / (k! * (n-k)!)
 *
 * Computes binomial coefficients using the multiplicative formula to avoid
 * computing large factorials. Uses symmetry C(n, k) = C(n, n-k) for efficiency.
 *
 * @note InfInt specialization available for arbitrary precision
 *
 * @code{.cpp}
 * auto coeff = bin(10, 3);              // Returns 120
 * auto large_coeff = bin<InfInt>(100, 50); // Arbitrary precision
 * @endcode
 */
template <class T>
T bin(const T& n, T k) noexcept {
    if (k > n) return 0;
    if (k == 0 || n == k) return 1;
    if (k > n - k) k = n - k;  // symmetry
    T res = 1;
    for (T i = 1; i <= k; ++i) res = res * (n - k + i) / i;
    return res;
}

/**
 * @brief Binomial coefficient specialization for arbitrary precision integers
 * @param n Total number of items
 * @param k Number of items to choose
 * @return Binomial coefficient C(n,k) using arbitrary precision arithmetic
 *
 * Specialization of bin() for InfInt that can handle arbitrarily large values without overflow. Uses
 * numerator/denominator approach for improved performance (does not perform expensive divisions in each iteration).
 *
 * @note InfInt operations are significantly slower than native integer types
 * @note Suitable for combinatorics problems requiring exact large integer results
 */
template <>
InfInt bin(const InfInt& n, InfInt k) noexcept {
    if (k > n) return 0;
    if (k == 0 || n == k) return 1;
    if (n == 0) return 0;
    if (k > n - k) k = n - k;  // symmetry
    InfInt numerator = 1;
    InfInt denominator = 1;
    for (InfInt i = 1; i <= k; ++i) {
        numerator *= n + 1 - i;
        denominator *= i;
    }
    return numerator / denominator;
}

/**
 * @brief Fast exponentiation using square-and-multiply algorithm
 * @tparam T Numeric type supporting multiplication
 * @param b Base value
 * @param e Exponent (can be negative for types supporting division)
 * @return b^e computed efficiently
 *
 * Computes b^e using the binary exponentiation algorithm with O(log e) complexity.
 * For negative exponents, computes (1/b)^|e| if T supports division.
 *
 * @note For negative exponents, T must support division (1/b)
 * @note Returns T(1) for e = 0 regardless of base value
 *
 * @code{.cpp}
 * auto result = sqm(2, 10);            // Returns 1024 (2^10)
 * auto big_pow = sqm<InfInt>(3, 100);  // Large exponentiation
 * auto inv_pow = sqm(2.0, -3);         // Returns 0.125 (2^-3)
 * @endcode
 */
template <class T>
constexpr T sqm(T b, int e) {
    static_assert(std::is_integral<decltype(e)>::value, "exponent must be integral type");
    if (e == 0) return T(1);
    if (e < 0) {
        b = T(1) / b;
        if (e == std::numeric_limits<int>::min())
            throw std::invalid_argument(
                "Exponent e too large!");  // INT_MIN might be INT_MAX+1, potential problem in next line
        e = -e;
    }
    // square and multiply
    T temp(1);
    unsigned int exp = static_cast<unsigned int>(e);
    while (exp > 0) {
        if (exp & 1) temp *= b;
        b *= b;
        exp >>= 1;
    }
    return temp;
}

/**
 * @brief Fast scalar multiplication using double-and-add algorithm
 * @tparam T Numeric type supporting addition
 * @param b Base value (multiplicand)
 * @param m Multiplier (can be negative)
 * @return b * m computed efficiently using repeated doubling
 *
 * Computes b * m using the binary multiplication algorithm with O(log |m|) complexity.
 * For negative multipliers, computes (-b) * |m|.
 *
 * @note Returns T(0) for m = 0 regardless of base value
 *
 * @code{.cpp}
 * auto result = daa(7, 3);                  // Returns 21 (7*3)
 * auto large_mult = daa<InfInt>(123, 456);  // Large multiplication
 * auto neg_mult = daa(5, -4);               // Returns -20 (5*(-4))
 * @endcode
 */
template <class T>
constexpr T daa(T b, int m) noexcept {
    static_assert(std::is_integral<decltype(m)>::value, "multiplicand must be integral type");
    if (m == 0) return T(0);
    if (m < 0) {
        b = -b;
        if (m == std::numeric_limits<int>::min())
            throw std::invalid_argument(
                "Multiplier m too large!");  // INT_MIN might be INT_MAX+1, potential problem in next line
        m = -m;
    }
    // double and add
    T temp(0);
    unsigned int um = static_cast<unsigned int>(m);
    while (um > 0) {
        if (um & 1) temp += b;
        b += b;
        um >>= 1;
    }
    return temp;
}

namespace details {

/**
 * @brief Constexpr-compatible floor function
 * @param x Floating-point value
 * @return Floor of x (largest integer ≤ x)
 *
 * Constexpr-compatible alternative to std::floor for compile-time evaluation. Handles both positive and negative
 * numbers correctly.
 *
 * @note Returns double (not integer) to match std::floor behavior
 *
 * @code{.cpp}
 * constexpr auto f1 = floor_constexpr(3.7);   // Returns 3.0
 * constexpr auto f2 = floor_constexpr(-2.3);  // Returns -3.0
 * @endcode
 */
constexpr double floor_constexpr(double x) {
    long int i = static_cast<long int>(x);
    return (x < 0 && x != i) ? i - 1 : i;
}

/**
 * @brief Cache entry type specification for compile-time type-safe caching
 * @tparam ID Unique compile-time identifier for this cache entry
 * @tparam T Value type to be cached for this ID
 *
 * Template structure used to associate compile-time IDs with their corresponding
 * types in the Cache template. Each entry maps an ID to a specific type.
 *
 * @note Used in conjunction with Cache<ENTRIES...> for type-safe heterogeneous caching
 * @note ID must be unique within a given Cache instance
 *
 * @code{.cpp}
 * using VectorEntry = CacheEntry<0, std::vector<int>>;
 * using DoubleEntry = CacheEntry<1, double>;
 * Cache<VectorEntry, DoubleEntry> cache;
 * @endcode
 */
template <auto ID, typename T>
struct CacheEntry {
    static constexpr auto id = ID;
    using type = T;
};

/**
 * @brief High-performance heterogeneous cache with compile-time type safety
 * @tparam ENTRIES Pack of CacheEntry types specifying ID-to-type mappings
 *
 * Template-based cache providing O(1) access to heterogeneous values using
 * compile-time IDs. Uses std::variant and std::array for efficient storage
 * with full type safety verified at compile time.
 *
 * @section Design_Features
 * - **Compile-time type safety**: Each ID maps to exactly one type
 * - **O(1) access**: Direct array indexing based on compile-time IDs
 * - **Heterogeneous storage**: Different types can be cached together
 * - **Memory efficient**: Fixed-size array based on maximum ID
 * - **Lazy evaluation**: Values computed only when first requested
 *
 * @warning **Not thread-safe** for concurrent writes. Use external synchronization
 * if multiple threads may call set(), invalidate(), or operator() simultaneously.
 *
 * @code{.cpp}
 * using Entry1 = CacheEntry<0, std::vector<int>>;
 * using Entry2 = CacheEntry<1, double>;
 * using Entry3 = CacheEntry<5, std::string>;  // IDs need not be consecutive
 * Cache<Entry1, Entry2, Entry3> cache;
 *
 * // Set values
 * cache.set<0>(std::vector<int>{1, 2, 3});
 * cache.set<1>(3.14159);
 *
 * // Get or compute values
 * auto& vec = cache.get_or_compute<0>([]() { return std::vector<int>{4, 5, 6}; });
 * auto& pi = cache.get_or_compute<1>([]() { return compute_pi(); });
 *
 * // Check and invalidate
 * if (cache.is_set<0>()) cache.invalidate<0>();
 * @endcode
 */
template <typename... ENTRIES>
class Cache {
   private:
    // Calculate maximum ID at compile time
    static constexpr auto max_id = std::max({ENTRIES::id...});

    // Create variant type from all entry types
    using VariantType = std::variant<std::monostate, typename ENTRIES::type...>;

    // Fixed-size array for O(1) access
    mutable std::array<VariantType, max_id + 1> cache_data{};

    // Compile-time ID -> Type lookup using simple recursion
    template <auto ID, typename First, typename... Rest>
    struct type_finder_impl {
        using type =
            std::conditional_t<First::id == ID, typename First::type, typename type_finder_impl<ID, Rest...>::type>;
    };

    template <auto ID, typename Last>
    struct type_finder_impl<ID, Last> {
        static_assert(Last::id == ID,
                      "Cache ID not found in CacheEntry list - check that all cache IDs are properly defined");
        using type = typename Last::type;
    };

    template <auto ID>
    using type_for_id_t = typename type_finder_impl<ID, ENTRIES...>::type;

   public:
    // Default constructor - initializes all cache slots to empty
    Cache() { cache_data.fill(std::monostate{}); }

    // Check if specific ID is cached
    template <auto ID>
    bool is_set() const {
        static_assert(ID <= max_id, "Cache ID out of bounds");
        return !std::holds_alternative<std::monostate>(cache_data[ID]);
    }

    // Set value for specific ID with automatic type conversion
    template <auto ID, typename TYPE>
    auto set(TYPE&& value) const {
        static_assert(ID <= max_id, "Cache ID out of bounds");
        using ExpectedType = type_for_id_t<ID>;

        auto old_it = cache_data.begin() + ID;
        cache_data[ID] = static_cast<ExpectedType>(std::forward<TYPE>(value));
        return std::make_pair(old_it, true);
    }

    // Invalidate specific ID
    template <auto ID>
    bool invalidate() const noexcept {
        static_assert(ID <= max_id, "Cache ID out of bounds");
        bool was_set = is_set<ID>();
        cache_data[ID] = std::monostate{};
        return was_set;
    }

    // Invalidate all entries
    bool invalidate() const noexcept {
        bool had_any = std::any_of(cache_data.begin(), cache_data.end(),
                                   [](const auto& entry) { return !std::holds_alternative<std::monostate>(entry); });
        cache_data.fill(std::monostate{});
        return had_any;
    }

    // Get or compute with lambda
    template <auto ID>
    const auto& operator()(auto&& calculate_func) const {
        static_assert(ID <= max_id, "Cache ID out of bounds");
        using ReturnType = type_for_id_t<ID>;

        auto& cached = cache_data[ID];
        if (std::holds_alternative<std::monostate>(cached)) {
            cached = calculate_func();
        }
        return std::get<ReturnType>(cached);
    }

    // Get cached value (if present)
    template <auto ID>
    std::optional<type_for_id_t<ID>> get() const {
        static_assert(ID <= max_id, "Cache ID out of bounds");
        if (std::holds_alternative<std::monostate>(cache_data[ID])) return std::nullopt;
        return std::get<type_for_id_t<ID>>(cache_data[ID]);
    }

    // Alternative syntax for cleaner usage
    template <auto ID>
    const auto& get_or_compute(auto&& calculate_func) const {
        return operator()<ID>(std::forward<decltype(calculate_func)>(calculate_func));
    }
};

std::string basename(const char* path) {
    std::string s(path);

    const auto pos = s.find_last_of("/\\");
    if (pos != std::string::npos) s.erase(0, pos + 1);

    const auto dot = s.find_last_of('.');
    if (dot != std::string::npos && dot != 0) s.erase(dot);

    return s;
}

}  // namespace details

}  // namespace CECCO

#endif