diff --git a/CMakeLists.txt b/CMakeLists.txt index 29b152c..6c85c06 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,3 +7,4 @@ if (NOT CMAKE_BUILD_TYPE) endif() add_executable(main main.cpp) +target_compile_options(main PUBLIC -ffast-math -march=native) diff --git a/main.cpp b/main.cpp index cf6369b..4923931 100644 --- a/main.cpp +++ b/main.cpp @@ -3,26 +3,31 @@ #include #include #include +#include float frand() { return (float)rand() / RAND_MAX * 2 - 1; } -struct Star { - float px, py, pz; - float vx, vy, vz; - float mass; +const int N = 48; + +struct StarField { + std::array px, py, pz; + std::array vx, vy, vz; + std::array mass; }; -std::vector stars; +StarField stars; void init() { - for (int i = 0; i < 48; i++) { - stars.push_back({ - frand(), frand(), frand(), - frand(), frand(), frand(), - frand() + 1, - }); + for (int i = 0; i < N; i++) { + stars.px.fill(frand()); + stars.py.fill(frand()); + stars.pz.fill(frand()); + stars.vx.fill(frand()); + stars.vy.fill(frand()); + stars.vz.fill(frand()); + stars.mass.fill(frand() + 1); } } @@ -31,36 +36,55 @@ float eps = 0.001; float dt = 0.01; void step() { - for (auto &star: stars) { - for (auto &other: stars) { - float dx = other.px - star.px; - float dy = other.py - star.py; - float dz = other.pz - star.pz; - float d2 = dx * dx + dy * dy + dz * dz + eps * eps; - d2 *= sqrt(d2); - star.vx += dx * other.mass * G * dt / d2; - star.vy += dy * other.mass * G * dt / d2; - star.vz += dz * other.mass * G * dt / d2; + float Gdt = G * dt; + float eps_square = eps * eps; + + // 修改为外层是变化的 + for (size_t i = 0; i < N; i++) { + float px = stars.px[i], py = stars.py[i], pz = stars.pz[i]; + float vx = stars.vx[i], vy = stars.vy[i], vz = stars.vz[i]; + + #pragma GCC unroll 2 + for (size_t j = 0; j < N; j++) { + float dx = stars.px[j] - px; + float dy = stars.py[j] - py; + float dz = stars.pz[j] - pz; + float d2 = dx * dx + dy * dy + dz * dz + eps_square; + d2 *= std::sqrt(d2); + vx += dx * stars.mass[j] * Gdt / d2; + vy += dy * stars.mass[j] * Gdt / d2; + vz += dz * stars.mass[j] * Gdt / d2; } + + stars.vx[i] = vx; + stars.vy[i] = vy; + stars.vz[i] = vz; } - for (auto &star: stars) { - star.px += star.vx * dt; - star.py += star.vy * dt; - star.pz += star.vz * dt; + + #pragma GCC unroll 3 + for (size_t i = 0; i < N; i++) { + stars.px[i] += stars.vx[i] * dt; + stars.py[i] += stars.vy[i] * dt; + stars.pz[i] += stars.vz[i] * dt; } } float calc() { float energy = 0; - for (auto &star: stars) { - float v2 = star.vx * star.vx + star.vy * star.vy + star.vz * star.vz; - energy += star.mass * v2 / 2; - for (auto &other: stars) { - float dx = other.px - star.px; - float dy = other.py - star.py; - float dz = other.pz - star.pz; + + for (int i = 0; i < N; i++) { + float v2 = stars.vx[i] * stars.vx[i] + stars.vy[i] * stars.vy[i] + stars.vz[i] * stars.vz[i]; + energy += stars.mass[i] * v2 * 0.5; + float px = stars.px[i]; + float py = stars.py[i]; + float pz = stars.pz[i]; + float mass_G_half = stars.mass[i] * G * 0.5; + for (int j = 0; j < N; j++) { + float dx = stars.px[j] - px; + float dy = stars.py[j] - py; + float dz = stars.pz[j] - pz; float d2 = dx * dx + dy * dy + dz * dz + eps * eps; - energy -= other.mass * star.mass * G / sqrt(d2) / 2; + energy -= stars.mass[j] * mass_G_half / std::sqrt(d2); } } return energy;