add convolution mod 2^64

adamant-pwn · adamant-pwn · commit 578666079180 · 2025-05-22T13:47:40.000+02:00
diff --git a/cp-algo/graph/base.hpp b/cp-algo/graph/base.hpp
@@ -44,9 +44,7 @@ namespace cp_algo::graph {
             return std::views::iota(0, n());
         }
         auto edges_view() const {
-            return std::views::iota(0, 2 * m()) | std::views::filter(
-                [](edge_index e) {return !(e % 2);}
-            );
+            return std::views::iota(0, 2 * m()) | std::views::stride(2);
         }
         auto const& incidence_lists() const {return adj;}
         edge_t const& edge(edge_index e) const {return edges[e];}
diff --git a/cp-algo/math/common.hpp b/cp-algo/math/common.hpp
@@ -29,5 +29,13 @@ namespace cp_algo::math {
     T bpow(T const& x, auto n) {
         return bpow(x, n, T(1));
     }
+    inline constexpr auto inv2(auto x) {
+        assert(x % 2);
+        std::make_unsigned_t<decltype(x)> y = 1;
+        while(y * x != 1) {
+            y *= 2 - x * y;
+        }
+        return y;
+    }
 }
 #endif // CP_ALGO_MATH_COMMON_HPP
diff --git a/cp-algo/math/fft64.hpp b/cp-algo/math/fft64.hpp
@@ -0,0 +1,126 @@
+#ifndef CP_ALGO_MATH_FFT64_HPP
+#define CP_ALGO_MATH_FFT64_HPP
+#include "../random/rng.hpp"
+#include "../math/common.hpp"
+#include "../math/cvector.hpp"
+
+namespace cp_algo::math::fft {
+    struct dft64 {
+        std::vector<cp_algo::math::fft::cvector> cv;
+
+        static uint64_t factor, ifactor;
+        static bool _init;
+
+        static void init() {
+            if(_init) return;
+            _init = true;
+            factor = random::rng();
+            if(factor % 2 == 0) {factor++;}
+            ifactor = inv2(factor);
+        }
+
+        dft64(auto const& a, size_t n): cv(4, n) {
+            init();
+            uint64_t cur = 1, step = bpow(factor, n);
+            for(size_t i = 0; i < std::min(std::size(a), n); i++) {
+                auto split = [&](size_t i, uint64_t mul) -> std::array<int16_t, 4> {
+                    uint64_t x = i < std::size(a) ? a[i] * mul : 0;
+                    std::array<int16_t, 4> res;
+                    for(int z = 0; z < 4; z++) {
+                        res[z] = int16_t(x);
+                        x = (x >> 16) + (res[z] < 0);
+                    }
+                    return res;
+                };
+                auto re = split(i, cur);
+                auto im = split(n + i, cur * step);
+                for(int z = 0; z < 4; z++) {
+                    real(cv[z].at(i))[i % 4] = re[z];
+                    imag(cv[z].at(i))[i % 4] = im[z];
+                }
+                cur *= factor;
+            }
+            checkpoint("dft64 init");
+            for(auto &x: cv) {
+                x.fft();
+            }
+        }
+
+        void dot(dft64 const& t) {
+            size_t N = cv[0].size();
+            cvector::exec_on_evals<1>(N / flen, [&](size_t k, point rt) {
+                k *= flen;
+                auto [A0x, A0y] = cv[0].at(k);
+                auto [A1x, A1y] = cv[1].at(k);
+                auto [A2x, A2y] = cv[2].at(k);
+                auto [A3x, A3y] = cv[3].at(k);
+                std::array B = {
+                    t.cv[0].at(k),
+                    t.cv[1].at(k),
+                    t.cv[2].at(k),
+                    t.cv[3].at(k)
+                };
+
+                std::array<vpoint, 4> C = {vz, vz, vz, vz};
+                for (size_t i = 0; i < flen; i++) {
+                    std::array A = {
+                        vpoint{vz + A0x[i], vz + A0y[i]},
+                        vpoint{vz + A1x[i], vz + A1y[i]},
+                        vpoint{vz + A2x[i], vz + A2y[i]},
+                        vpoint{vz + A3x[i], vz + A3y[i]}
+                    };
+                    for(size_t k = 0; k < 4; k++) {
+                        for(size_t i = 0; i <= k; i++) {
+                            C[k] += A[i] * B[k - i];
+                        }
+                    }
+                    for(size_t k = 0; k < 4; k++) {
+                        real(B[k]) = rotate_right(real(B[k]));
+                        imag(B[k]) = rotate_right(imag(B[k]));
+                        auto bx = real(B[k])[0], by = imag(B[k])[0];
+                        real(B[k])[0] = bx * real(rt) - by * imag(rt);
+                        imag(B[k])[0] = bx * imag(rt) + by * real(rt);
+                    }
+                }
+                cv[0].at(k) = C[0];
+                cv[1].at(k) = C[1];
+                cv[2].at(k) = C[2];
+                cv[3].at(k) = C[3];
+            });
+            checkpoint("dot");
+            for(auto &x: cv) {
+                x.ifft();
+            }
+        }
+
+        void recover_mod(auto &res, size_t k) {
+            size_t n = cv[0].size();
+            uint64_t cur = 1, step = bpow(ifactor, n);
+            for(size_t i = 0; i < std::min(k, n); i++) {
+                std::array re = {real(cv[0].get(i)), real(cv[1].get(i)), real(cv[2].get(i)), real(cv[3].get(i))};
+                std::array im = {imag(cv[0].get(i)), imag(cv[1].get(i)), imag(cv[2].get(i)), imag(cv[3].get(i))};
+                auto set_i = [&](size_t i, auto &x, auto mul) {
+                    if (i >= k) return;
+                    res[i] = llround(x[0]) + (llround(x[1]) << 16) + (llround(x[2]) << 32) + (llround(x[3]) << 48);
+                    res[i] *= mul;
+                };
+                set_i(i, re, cur);
+                set_i(n + i, im, cur * step);
+                cur *= ifactor;
+            }
+            cp_algo::checkpoint("recover mod");
+        }
+    };
+    uint64_t dft64::factor = 1, dft64::ifactor = 1;
+    bool dft64::_init = false;
+
+    void conv64(auto& a, auto const& b) {
+        size_t n = a.size(), m = b.size();
+        size_t N = std::max(flen, std::bit_ceil(n + m - 1) / 2);
+        dft64 A(a, N), B(b, N);
+        A.dot(B);
+        a.resize(n + m - 1);
+        A.recover_mod(a, n + m - 1);
+    }
+}
+#endif // CP_ALGO_MATH_FFT64_HPP
diff --git a/cp-algo/number_theory/modint.hpp b/cp-algo/number_theory/modint.hpp
@@ -111,15 +111,6 @@ namespace cp_algo::math {
         auto getr() const {return Base::r;}
     };
 
-    inline constexpr auto inv2(auto x) {
-        assert(x % 2);
-        std::make_unsigned_t<decltype(x)> y = 1;
-        while(y * x != 1) {
-            y *= 2 - x * y;
-        }
-        return y;
-    }
-
     template<typename Int = int64_t>
     struct dynamic_modint: modint_base<dynamic_modint<Int>, Int> {
         using Base = modint_base<dynamic_modint<Int>, Int>;
diff --git a/cp-algo/util/simd.hpp b/cp-algo/util/simd.hpp
@@ -7,11 +7,12 @@
 namespace cp_algo {
     template<typename T, size_t len>
     using simd [[gnu::vector_size(len * sizeof(T))]] = T;
+    using u32x8 = simd<uint32_t, 8>;
     using i64x4 = simd<int64_t, 4>;
     using u64x4 = simd<uint64_t, 4>;
-    using u32x8 = simd<uint32_t, 8>;
     using i32x4 = simd<int32_t, 4>;
     using u32x4 = simd<uint32_t, 4>;
+    using i16x4 = simd<int16_t, 4>;
     using dx4 = simd<double, 4>;
 
     [[gnu::always_inline]] inline dx4 abs(dx4 a) {
diff --git a/verify/poly/convolution64.test.cpp b/verify/poly/convolution64.test.cpp
@@ -0,0 +1,31 @@
+// @brief Convolution (Mod $2^{64}$)
+#define PROBLEM "https://judge.yosupo.jp/problem/convolution_mod_2_64"
+#pragma GCC optimize("Ofast,unroll-loops")
+#define CP_ALGO_CHECKPOINT
+#include <bits/stdc++.h>
+#include "cp-algo/math/fft64.hpp"
+#include "blazingio/blazingio.min.hpp"
+
+using namespace std;
+
+void solve() {
+    int n, m;
+    cin >> n >> m;
+    vector<uint64_t, cp_algo::big_alloc<uint64_t>> a(n), b(m);
+    for(auto &x : a) cin >> x;
+    for(auto &x : b) cin >> x;
+    cp_algo::checkpoint("read");
+    cp_algo::math::fft::conv64(a, b);
+    for(auto x: a) {
+        cout << uint64_t(x) << " ";
+    }
+    cp_algo::checkpoint("write");
+    cp_algo::checkpoint<1>();
+}
+
+signed main() {
+    //freopen("input.txt", "r", stdin);
+    ios::sync_with_stdio(0);
+    cin.tie(0);
+    solve();
+}

Original file line number	Diff line number	Diff line change
`@@ -44,9 +44,7 @@ namespace cp_algo::graph {`
`44`	`44`	`return std::views::iota(0, n());`
`45`	`45`	`}`
`46`	`46`	`auto edges_view() const {`
`47`		`- return std::views::iota(0, 2 * m()) \| std::views::filter(`
`48`		`- [](edge_index e) {return !(e % 2);}`
`49`		`- );`
	`47`	`+ return std::views::iota(0, 2 * m()) \| std::views::stride(2);`
`50`	`48`	`}`
`51`	`49`	`auto const& incidence_lists() const {return adj;}`
`52`	`50`	`edge_t const& edge(edge_index e) const {return edges[e];}`
Original file line number	Diff line number	Diff line change
`@@ -29,5 +29,13 @@ namespace cp_algo::math {`
`29`	`29`	`T bpow(T const& x, auto n) {`
`30`	`30`	`return bpow(x, n, T(1));`
`31`	`31`	`}`
	`32`	`+ inline constexpr auto inv2(auto x) {`
	`33`	`+ assert(x % 2);`
	`34`	`+ std::make_unsigned_t<decltype(x)> y = 1;`
	`35`	`+ while(y * x != 1) {`
	`36`	`+ y = 2 - x y;`
	`37`	`+ }`
	`38`	`+ return y;`
	`39`	`+ }`
`32`	`40`	`}`
`33`	`41`	`#endif // CP_ALGO_MATH_COMMON_HPP`