diff --git a/CMakeLists.txt b/CMakeLists.txt index 29b152c..53c794c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,3 +7,7 @@ if (NOT CMAKE_BUILD_TYPE) endif() add_executable(main main.cpp) + +find_package(OpenMP REQUIRED) +target_link_libraries(main PUBLIC OpenMP::OpenMP_CXX) +target_compile_options(main PUBLIC -ffast-math -march=native) \ No newline at end of file diff --git a/main.cpp b/main.cpp index cf6369b..655dab5 100644 --- a/main.cpp +++ b/main.cpp @@ -12,11 +12,13 @@ struct Star { float px, py, pz; float vx, vy, vz; float mass; + char padding[12]; }; std::vector stars; void init() { + #pragma unroll for (int i = 0; i < 48; i++) { stars.push_back({ frand(), frand(), frand(), @@ -26,23 +28,32 @@ void init() { } } -float G = 0.001; -float eps = 0.001; -float dt = 0.01; +float G = 0.001f; +float eps = 0.001f; +float dt = 0.01f; void step() { + float eps2 = eps * eps; for (auto &star: stars) { + float tmp_vx = 0, tmp_vy = 0, tmp_vz = 0; + float factor = G * dt; + #pragma omp simd for (auto &other: stars) { float dx = other.px - star.px; float dy = other.py - star.py; float dz = other.pz - star.pz; - float d2 = dx * dx + dy * dy + dz * dz + eps * eps; - d2 *= sqrt(d2); - star.vx += dx * other.mass * G * dt / d2; - star.vy += dy * other.mass * G * dt / d2; - star.vz += dz * other.mass * G * dt / d2; + float d2 = dx * dx + dy * dy + dz * dz + eps2; + d2 *= std::sqrt(d2); + float div = 1.0f / d2; + tmp_vx += dx * other.mass * factor * div; + tmp_vy += dy * other.mass * factor * div; + tmp_vz += dz * other.mass * factor * div; } + star.vx += tmp_vx; + star.vy += tmp_vy; + star.vz += tmp_vz; } + #pragma unroll for (auto &star: stars) { star.px += star.vx * dt; star.py += star.vy * dt; @@ -52,15 +63,17 @@ void step() { float calc() { float energy = 0; + float eps2 = eps * eps; for (auto &star: stars) { float v2 = star.vx * star.vx + star.vy * star.vy + star.vz * star.vz; energy += star.mass * v2 / 2; + #pragma omp simd for (auto &other: stars) { float dx = other.px - star.px; float dy = other.py - star.py; float dz = other.pz - star.pz; - float d2 = dx * dx + dy * dy + dz * dz + eps * eps; - energy -= other.mass * star.mass * G / sqrt(d2) / 2; + float d2 = dx * dx + dy * dy + dz * dz + eps2; + energy -= other.mass * star.mass * G / std::sqrt(d2) / 2; } } return energy; @@ -79,6 +92,7 @@ int main() { init(); printf("Initial energy: %f\n", calc()); auto dt = benchmark([&] { + #pragma unroll for (int i = 0; i < 100000; i++) step(); });