You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
abstract = {In the quest for highest performance in scientific computing, we present a novel framework that relies on high-bandwidth communication between GPUs in a compute cluster. The framework offers linear scaling of performance for explicit algorithms that is only limited by the size of the dataset and the number of GPUs. Slices of the dataset propagate in a ring of processes (GPUs) from one GPU, where they are processed, to the next, which results in a parallel-in-time parallelization. The user of the framework has to write GPU kernels that implement the algorithm and provide slices of the dataset. Knowledge about the underlying parallelization strategy is not required because the communication between processes is carried out by the framework. As a case study, molecular dynamics simulation based on the Lennard-Jones potential is implemented to measure the performance for a homogeneous fluid. Single node performance and strong scaling behavior of this framework is compared to LAMMPS, which is outperformed in the strong scaling case.},
8048
+
author = {Martin Rose and Simon Homes and Lukas Ramsperger and Jose Gracia and Christoph Niethammer and Jadran Vrabec},
8049
+
howpublished = {arXiv:2507.11289v1 [cs.DC]},
8050
+
title = {Cyclic Data Streaming on GPUs for Short Range Stencils Applied to Molecular Dynamics},
8051
+
url = {http://arxiv.org/abs/2507.11289v1},
8052
+
year = {2025},
8053
+
}
8054
+
8034
8055
@article{SperryEtAl2025,
8035
8056
author = {Sperry, Mark Z. and Hwang, John T.},
8036
8057
doi = {10.1007/s11081-025-09967-y},
@@ -8141,3 +8162,12 @@ @unpublished{ZhangEtAl2025
8141
8162
url = {http://arxiv.org/abs/2502.02473v1},
8142
8163
year = {2025},
8143
8164
}
8165
+
8166
+
@unpublished{ZhongEtAl2025,
8167
+
abstract = {This paper mainly studies a direct time-parallel algorithm for solving time-dependent differential equations of order 1 to 3. Different from the traditional time-stepping approach, we directly solve the all-at-once system from higher-order evolution equations by diagonalization the time discretization matrix $B$. Based on the connection between the characteristic equation and Chebyshev polynomials, we give explicit formulas for the eigenvector matrix $V$ of $B$ and its inverse $V^{-1}$ , and prove that $cond_2\left( V \right) =\mathcal{O} \left( n^3 \right)$, where $n$ is the number of time steps. A fast algorithm $B$ designed by exploring the structure of the spectral decomposition of $B$. Numerical experiments were performed to validate the acceleration performance of the fast spectral decomposition algorithm. The results show that the proposed fast algorithm achieves significant computational speedup.},
8168
+
author = {Shun-Zhi Zhong and Yong-Liang Zhao},
8169
+
howpublished = {arXiv:2507.05743v1 [math.NA]},
8170
+
title = {A direct PinT algorithm for higher-order nonlinear equations},
0 commit comments