diff --git a/include/picongpu/fields/Fields.def b/include/picongpu/fields/Fields.def
index 2744b6aac7..777c6e94d8 100644
--- a/include/picongpu/fields/Fields.def
+++ b/include/picongpu/fields/Fields.def
@@ -68,4 +68,8 @@ namespace picongpu
/** Current Density j, @see FieldJ.hpp */
class FieldJ;
+ namespace fields::poissonSolver
+ {
+ struct FieldV;
+ } // namespace fields::poissonSolver
} // namespace picongpu
diff --git a/include/picongpu/fields/Fields.hpp b/include/picongpu/fields/Fields.hpp
index 8ab7dd9305..dee16e1fe7 100644
--- a/include/picongpu/fields/Fields.hpp
+++ b/include/picongpu/fields/Fields.hpp
@@ -25,3 +25,4 @@
#include "picongpu/fields/FieldJ.hpp"
#include "picongpu/fields/FieldTmp.hpp"
#include "picongpu/fields/Fields.def"
+#include "picongpu/fields/poissonSolver/FieldV.hpp"
diff --git a/include/picongpu/fields/poissonSolver/BICGStab.hpp b/include/picongpu/fields/poissonSolver/BICGStab.hpp
new file mode 100644
index 0000000000..95dcac28fa
--- /dev/null
+++ b/include/picongpu/fields/poissonSolver/BICGStab.hpp
@@ -0,0 +1,35 @@
+/* Copyright 2025 Tapish Narwal, Luca Pennati, Rene Widera
+ *
+ * This file is part of PIConGPU.
+ *
+ * PIConGPU is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * PIConGPU is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with PIConGPU.
+ * If not, see .
+ */
+
+#pragme once
+
+#include "picongpu/defines.hpp"
+#include "picongpu/fields/FieldTmpOperations.hpp"
+
+struct BICGStab
+{
+ // return residual
+ // return number of iterations
+ void operator()(FieldTmp& fieldV, FieldTmp& fiedlRho, MappingDesk* cellDescription)
+ {
+ // set boundary conditions on fieldV (Dirichlet or Neuman)
+
+ // normalize the problem based on norm(fieldRho)
+ }
+};
diff --git a/include/picongpu/fields/poissonSolver/BoundaryConditions.hpp b/include/picongpu/fields/poissonSolver/BoundaryConditions.hpp
new file mode 100644
index 0000000000..663b58538a
--- /dev/null
+++ b/include/picongpu/fields/poissonSolver/BoundaryConditions.hpp
@@ -0,0 +1,118 @@
+/* Copyright 2025 Tapish Narwal, Luca Pennati, Rene Widera
+ *
+ * This file is part of PIConGPU.
+ *
+ * PIConGPU is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * PIConGPU is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with PIConGPU.
+ * If not, see .
+ */
+
+#pragma once
+
+#include "picongpu/defines.hpp"
+#include "picongpu/fields/FieldTmpOperations.hpp"
+#include "picongpu/fields/poissonSolver/FieldV.hpp"
+
+#include
+#include
+#include
+
+namespace picongpu::fields::poissonSolver
+{
+ struct SolutionFunction
+ {
+ HDINLINE auto operator()(math::Vector const& totalCellCoordinate) const
+ {
+ if constexpr(simDim == 3u)
+ {
+ return math::sin(totalCellCoordinate.x()) + math::cos(totalCellCoordinate.y())
+ + 3.0 * math::sin(totalCellCoordinate.z())
+ + totalCellCoordinate.x() * totalCellCoordinate.productOfComponents() + 10.0;
+ }
+ else if constexpr(simDim == 2u)
+ {
+ return math::sin(totalCellCoordinate.x()) + math::cos(totalCellCoordinate.y())
+ + totalCellCoordinate.x() * totalCellCoordinate.productOfComponents() + 10.0;
+ }
+ }
+ };
+
+ struct ApplyDirichletBCsFromFunctionKernel
+ {
+ DINLINE auto operator()(
+ auto const& worker,
+ auto fieldVBox,
+ auto const boundaryFunction,
+ DataSpace cellOffsetToTotalOrigin,
+ auto const mapper) const -> void
+ {
+ // including guards
+ DataSpace const superCellIdx(mapper.getSuperCellIndex(worker.blockDomIdxND()));
+
+ DataSpace numGuardCells = mapper.getGuardingSuperCells() * SuperCellSize::toRT();
+
+ // no guards included
+ DataSpace superCellTotalCellOffset
+ = cellOffsetToTotalOrigin + superCellIdx * SuperCellSize::toRT() - numGuardCells;
+
+ constexpr uint32_t cellsPerSuperCell = pmacc::math::CT::volume::type::value;
+
+ auto forEachCellInSupercell = lockstep::makeForEach(worker);
+
+ forEachCellInSupercell(
+ [&](int32_t const linearCellIdx)
+ {
+ /* cell index within the superCell */
+ DataSpace const cellIdx = pmacc::math::mapToND(SuperCellSize::toRT(), linearCellIdx);
+ // without guards
+ DataSpace const totalCellIdx = superCellTotalCellOffset + cellIdx;
+
+ auto totalDistance = precisionCast(totalCellIdx)
+ * precisionCast(sim.pic.getCellSize().shrink());
+
+ fieldVBox(superCellIdx * SuperCellSize::toRT() + cellIdx) = boundaryFunction(totalDistance);
+ });
+ }
+ };
+
+ struct BoundaryConditionsDirichlet
+ {
+ // return residual
+ // return number of iterations
+ void operator()(FieldV& fieldV, MappingDesc cellDescription) const
+ {
+ SubGrid const& subGrid = Environment::get().SubGrid();
+ auto globalDomain = subGrid.getGlobalDomain();
+ auto localDomain = subGrid.getLocalDomain();
+
+ auto cellOffsetToTotalOrigin = globalDomain.offset + localDomain.offset;
+
+
+ for(uint32_t i = 1; i < NumberOfExchanges::value; ++i)
+ {
+ /* only call for planes: left right top bottom back front*/
+ if(FRONT % i == 0 && !(Environment::get().GridController().getCommunicationMask().isSet(i)))
+ {
+ ExchangeMapping mapper(cellDescription, i);
+
+ PMACC_LOCKSTEP_KERNEL(ApplyDirichletBCsFromFunctionKernel{})
+ .config(mapper.getGridDim(), SuperCellSize{})(
+ fieldV.fieldVBuffer->getDeviceBuffer().getDataBox(),
+ SolutionFunction{},
+ cellOffsetToTotalOrigin,
+ mapper);
+ }
+ }
+ }
+ };
+} // namespace picongpu::fields::poissonSolver
diff --git a/include/picongpu/fields/poissonSolver/ChargeDeposition.hpp b/include/picongpu/fields/poissonSolver/ChargeDeposition.hpp
new file mode 100644
index 0000000000..e6b0bda3e4
--- /dev/null
+++ b/include/picongpu/fields/poissonSolver/ChargeDeposition.hpp
@@ -0,0 +1,20 @@
+/* Copyright 2025 Tapish Narwal, Luca Pennati, Rene Widera
+ *
+ * This file is part of PIConGPU.
+ *
+ * PIConGPU is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * PIConGPU is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with PIConGPU.
+ * If not, see .
+ */
+
+#pragma once
diff --git a/include/picongpu/fields/poissonSolver/FieldV.hpp b/include/picongpu/fields/poissonSolver/FieldV.hpp
new file mode 100644
index 0000000000..73e93a6d32
--- /dev/null
+++ b/include/picongpu/fields/poissonSolver/FieldV.hpp
@@ -0,0 +1,121 @@
+/* Copyright 2025 Tapish Narwal, Luca Pennati, Rene Widera
+ *
+ * This file is part of PIConGPU.
+ *
+ * PIConGPU is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * PIConGPU is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with PIConGPU.
+ * If not, see .
+ */
+
+#pragma once
+
+#include "picongpu/defines.hpp"
+#include "picongpu/fields/YeeCell.hpp"
+#include "picongpu/traits/FieldPosition.hpp"
+#include "picongpu/traits/SIBaseUnits.hpp"
+
+#include
+
+namespace picongpu::fields::poissonSolver
+{
+ struct FieldV : pmacc::ISimulationData
+ {
+ using ValueType = float_64;
+
+ std::shared_ptr> fieldVBuffer;
+
+ //! Unit type of field components
+ using UnitValueType = float1_64;
+
+ using DataBoxType = pmacc::DataBox>;
+
+ //! Number of components of ValueType, for serialization
+ static constexpr int numComponents = 1u;
+
+ FieldV(picongpu::MappingDesc const mappingDesc)
+ : fieldVBuffer{std::make_shared>(mappingDesc.getGridLayout())}
+ {
+ }
+
+ void synchronize() override
+ {
+ fieldVBuffer->getHostBuffer().copyFrom(fieldVBuffer->getDeviceBuffer());
+ };
+
+ //! Get the host data box for the field values
+ DataBoxType getHostDataBox()
+ {
+ return fieldVBuffer->getHostBuffer().getDataBox();
+ }
+
+ //! Get the device data box for the field values
+ DataBoxType getDeviceDataBox()
+ {
+ return fieldVBuffer->getDeviceBuffer().getDataBox();
+ }
+
+ GridLayout getGridLayout()
+ {
+ return fieldVBuffer->getGridLayout();
+ }
+
+ /**
+ * Return the globally unique identifier for this simulation data.
+ *
+ * @return globally unique identifier
+ */
+ SimulationDataId getUniqueId() override
+ {
+ return "FieldV";
+ };
+
+ static std::string getName()
+ {
+ return "FieldV";
+ }
+
+ static UnitValueType getUnit()
+ {
+ return UnitValueType{sim.unit.eField() * sim.unit.length()};
+ }
+
+ static std::vector getUnitDimension()
+ {
+ /* V is in volts: V = kg * m^2 / (A * s^3)
+ * -> L^2 * M * T^-3 * I^-1
+ */
+ std::vector unitDimension(7, 0.0);
+ unitDimension.at(SIBaseUnits::length) = 2.0;
+ unitDimension.at(SIBaseUnits::mass) = 1.0;
+ unitDimension.at(SIBaseUnits::time) = -3.0;
+ unitDimension.at(SIBaseUnits::electricCurrent) = -1.0;
+ return unitDimension;
+ }
+ };
+} // namespace picongpu::fields::poissonSolver
+
+namespace picongpu::traits
+{
+ template<>
+ struct FieldPosition
+ {
+ using VectorVectorDD = ::pmacc::math::Vector const;
+
+ HDINLINE FieldPosition() = default;
+
+ HDINLINE VectorVectorDD operator()() const
+ {
+ return VectorVectorDD::create(floatD_X::create(0.0));
+ }
+ };
+} // namespace picongpu::traits
diff --git a/include/picongpu/fields/poissonSolver/RightHandSideNormalization.hpp b/include/picongpu/fields/poissonSolver/RightHandSideNormalization.hpp
new file mode 100644
index 0000000000..a5fe7d6840
--- /dev/null
+++ b/include/picongpu/fields/poissonSolver/RightHandSideNormalization.hpp
@@ -0,0 +1,95 @@
+/* Copyright 2025 Tapish Narwal, Luca Pennati, Rene Widera
+ *
+ * This file is part of PIConGPU.
+ *
+ * PIConGPU is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * PIConGPU is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with PIConGPU.
+ * If not, see .
+ */
+
+#pragma once
+
+#include "picongpu/defines.hpp"
+#include "picongpu/fields/FieldTmpOperations.hpp"
+#include "picongpu/fields/poissonSolver/FieldV.hpp"
+
+#include
+#include
+#include
+#include
+
+namespace picongpu::fields::poissonSolver
+{
+ struct RightHandSideNormalizationKernel
+ {
+ DINLINE auto operator()(auto const& worker, auto const fieldVBox, auto fieldRhoBox, auto const mapper) const
+ -> void
+ {
+ DataSpace const relExchangeDir = Mask::getRelativeDirections(mapper.getExchangeType());
+ DataSpace const superCellIdx(mapper.getSuperCellIndex(worker.blockDomIdxND()));
+ DataSpace superCellCellOffset = superCellIdx * SuperCellSize::toRT();
+
+ DataSpace numGuardCells = mapper.getGuardingSuperCells() * SuperCellSize::toRT();
+ DataSpace adjustedSuperCellCellOffset = superCellCellOffset - (relExchangeDir * numGuardCells);
+
+ DataSpace numCellsLocalDomain = mapper.getGridSuperCells() * SuperCellSize::toRT();
+
+ constexpr uint32_t cellsPerSuperCell = pmacc::math::CT::volume::type::value;
+
+ auto forEachCellInSupercell = lockstep::makeForEach(worker);
+
+ forEachCellInSupercell(
+ [&](int32_t const linearCellIdx)
+ {
+ /* cell index within the superCell */
+ DataSpace const cellIdx = pmacc::math::mapToND(SuperCellSize::toRT(), linearCellIdx);
+
+ DataSpace const localCellIdx = superCellCellOffset + cellIdx;
+ DataSpace const dataCellIdx = adjustedSuperCellCellOffset + cellIdx;
+
+ for(uint32_t d = 0; d < simDim; ++d)
+ {
+ if(relExchangeDir[d] != 0
+ && (localCellIdx[d] == 0u || localCellIdx[d] == numCellsLocalDomain[d] - 1))
+ {
+ fieldRhoBox(dataCellIdx) += fieldVBox(dataCellIdx + relExchangeDir)
+ / (sim.pic.getCellSize()[d] * sim.pic.getCellSize()[d]);
+ }
+ }
+ });
+ }
+ };
+
+ struct RightHandSideNormalization
+ {
+ // return residual
+ // return number of iterations
+ void operator()(FieldV& fieldV, FieldTmp& fieldRho, MappingDesc cellDescription)
+ {
+ /* only call for planes: left right top bottom back front*/
+ for(uint32_t i = 1; i < NumberOfExchanges::value; ++i)
+ {
+ if(FRONT % i == 0 && !(Environment::get().GridController().getCommunicationMask().isSet(i)))
+ {
+ ExchangeMapping mapper(cellDescription, i);
+
+ PMACC_LOCKSTEP_KERNEL(RightHandSideNormalizationKernel{})
+ .config(mapper.getGridDim(), SuperCellSize{})(
+ fieldV.fieldVBuffer->getDeviceBuffer().getDataBox(),
+ fieldRho.getDeviceDataBox(),
+ mapper);
+ }
+ }
+ }
+ };
+} // namespace picongpu::fields::poissonSolver
diff --git a/include/picongpu/fields/poissonSolver/Stencil.hpp b/include/picongpu/fields/poissonSolver/Stencil.hpp
new file mode 100644
index 0000000000..4b98714241
--- /dev/null
+++ b/include/picongpu/fields/poissonSolver/Stencil.hpp
@@ -0,0 +1,147 @@
+/* Copyright 2025 Tapish Narwal, Luca Pennati, Rene Widera
+ *
+ * This file is part of PIConGPU.
+ *
+ * PIConGPU is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * PIConGPU is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with PIConGPU.
+ * If not, see .
+ */
+
+#pragma once
+
+#include "picongpu/defines.hpp"
+#include "picongpu/fields/FieldTmpOperations.hpp"
+#include "picongpu/fields/poissonSolver/FieldV.hpp"
+
+#include
+#include
+#include
+
+namespace picongpu::fields::poissonSolver
+{
+ struct StencilFunc
+ {
+ HDINLINE auto operator()(auto fieldIn) const
+ {
+ constexpr auto cellSize = sim.pic.getCellSize().shrink();
+ constexpr auto cellSizeSquared = cellSize * cellSize;
+ float_64 const fc0 = 2.0 * (1.0 / (cellSizeSquared)).sumOfComponents();
+ if constexpr(simDim == 3u)
+ {
+ constexpr DataSpace<3u> xDir(1, 0, 0);
+ constexpr DataSpace<3u> yDir(0, 1, 0);
+ constexpr DataSpace<3u> zDir(0, 0, 1);
+
+
+ return *fieldIn * fc0 - (fieldIn(-xDir) + fieldIn(xDir)) / cellSizeSquared.x()
+ - (fieldIn(-yDir) + fieldIn(yDir)) / cellSizeSquared.y()
+ - (fieldIn(-zDir) + fieldIn(zDir)) / cellSizeSquared.z();
+ }
+ else if constexpr(simDim == 2u)
+ {
+ constexpr DataSpace<2u> xDir(1, 0);
+ constexpr DataSpace<2u> yDir(0, 1);
+
+ return *fieldIn * fc0 - (fieldIn(-xDir) + fieldIn(xDir)) / cellSizeSquared.x()
+ - (fieldIn(-yDir) + fieldIn(yDir)) / cellSizeSquared.y();
+ }
+ }
+ };
+
+ struct GetFieldEStencil
+ {
+ HDINLINE auto operator()(auto fieldIn) const
+ {
+ constexpr auto cellSize = sim.pic.getCellSize().shrink();
+
+ float3_X eValue;
+ if constexpr(simDim == 3u)
+ {
+ constexpr DataSpace<3u> xDir(1, 0, 0);
+ constexpr DataSpace<3u> yDir(0, 1, 0);
+ constexpr DataSpace<3u> zDir(0, 0, 1);
+
+ eValue.x() = (*fieldIn - fieldIn(xDir)) / cellSize.x();
+ eValue.y() = (*fieldIn - fieldIn(yDir)) / cellSize.y();
+ eValue.z() = (*fieldIn - fieldIn(zDir)) / cellSize.z();
+
+ return eValue;
+ }
+ else if constexpr(simDim == 2u)
+ {
+ constexpr DataSpace<3u> xDir(1, 0, 0);
+ constexpr DataSpace<3u> yDir(0, 1, 0);
+
+ eValue.x() = (*fieldIn - fieldIn(xDir)) / cellSize.x();
+ eValue.y() = (*fieldIn - fieldIn(yDir)) / cellSize.y();
+ eValue.z() = 0.0_X;
+
+ return eValue;
+ }
+ }
+ };
+
+ struct Stencil
+ {
+ DINLINE auto operator()(
+ auto const& worker,
+ auto const mapper,
+ auto const stencilFunctor,
+ auto fieldOut,
+ auto fieldIn) const -> void
+ {
+ DataSpace const superCellIdx(mapper.getSuperCellIndex(worker.blockDomIdxND()));
+ DataSpace superCellCellOffset = superCellIdx * SuperCellSize::toRT();
+
+ using Type = typename decltype(fieldIn)::ValueType;
+ using BlockArea = pmacc::SuperCellDescription<
+ SuperCellSize,
+ typename pmacc::math::CT::make_Int::type,
+ typename pmacc::math::CT::make_Int::type>;
+
+ constexpr uint32_t cellsPerSuperCell = pmacc::math::CT::volume::type::value;
+
+ // use the cached buffer, beacuse I am doing multiple reads, moves the blockArea to shared memory
+ auto cache = pmacc::CachedBox::create<0, Type>(worker, BlockArea());
+ auto buffShifted = fieldIn.shift(superCellCellOffset);
+
+ // the thread collective is a convenience wrapper for lockstep make for each
+ // it deals with the guard offset, subtracts the origin offset
+ auto collective = pmacc::makeThreadCollective();
+
+ pmacc::math::operation::Assign assign;
+ collective(worker, assign, cache, buffShifted);
+
+ worker.sync();
+
+ auto forEachCellInSupercell = lockstep::makeForEach(worker);
+
+ forEachCellInSupercell(
+ [&](int32_t const linearCellIdx)
+ {
+ /* cell index within the superCell */
+ DataSpace const cellIdx = pmacc::math::mapToND(SuperCellSize::toRT(), linearCellIdx);
+ fieldOut[superCellCellOffset + cellIdx] = stencilFunctor(cache.shift(cellIdx));
+ });
+ }
+ };
+
+ inline void stencil(auto mapper, auto const& functor, auto& outBuffer, auto& inBuffer)
+ {
+ // poisson stencil
+ PMACC_LOCKSTEP_KERNEL(fields::poissonSolver::Stencil{})
+ .config(
+ mapper.getGridDim(),
+ SuperCellSize{})(mapper, functor, outBuffer.getDataBox(), inBuffer.getDataBox());
+ }
+} // namespace picongpu::fields::poissonSolver
diff --git a/include/picongpu/main.x.cpp b/include/picongpu/main.x.cpp
index e4ff97b354..f31973e3df 100644
--- a/include/picongpu/main.x.cpp
+++ b/include/picongpu/main.x.cpp
@@ -44,25 +44,30 @@ int runSimulation(int argc, char** argv)
{
using namespace picongpu;
- auto sim = ::picongpu::
- SimulationStarter<::picongpu::InitialiserController, ::picongpu::PluginController, ::picongpu::Simulation>{};
- auto const parserStatus = sim.parseConfigs(argc, argv);
int errorCode = EXIT_FAILURE;
-
- switch(parserStatus)
{
- case ArgsParser::Status::error:
- errorCode = EXIT_FAILURE;
- break;
- case ArgsParser::Status::success:
- sim.load();
- sim.start();
- sim.unload();
- [[fallthrough]];
- case ArgsParser::Status::successExit:
- errorCode = 0;
- break;
- };
+ auto sim = ::picongpu::SimulationStarter<
+ ::picongpu::InitialiserController,
+ ::picongpu::PluginController,
+ ::picongpu::Simulation>{};
+ auto const parserStatus = sim.parseConfigs(argc, argv);
+
+
+ switch(parserStatus)
+ {
+ case ArgsParser::Status::error:
+ errorCode = EXIT_FAILURE;
+ break;
+ case ArgsParser::Status::success:
+ sim.load();
+ sim.start();
+ sim.unload();
+ [[fallthrough]];
+ case ArgsParser::Status::successExit:
+ errorCode = 0;
+ break;
+ };
+ }
// finalize the pmacc context */
pmacc::Environment<>::get().finalize();
diff --git a/include/picongpu/simulation/control/Simulation.hpp b/include/picongpu/simulation/control/Simulation.hpp
index 451c07d3b8..8af9173341 100644
--- a/include/picongpu/simulation/control/Simulation.hpp
+++ b/include/picongpu/simulation/control/Simulation.hpp
@@ -53,6 +53,7 @@
#include "picongpu/simulation/stage/ParticleInit.hpp"
#include "picongpu/simulation/stage/ParticleIonization.hpp"
#include "picongpu/simulation/stage/ParticlePush.hpp"
+#include "picongpu/simulation/stage/Poisson.hpp"
#include "picongpu/simulation/stage/RuntimeDensityFile.hpp"
#include "picongpu/simulation/stage/SynchrotronRadiation.hpp"
#include "picongpu/versionFormat.hpp"
@@ -153,6 +154,9 @@ namespace picongpu
fieldBackground.registerHelp(desc);
particleBoundaries.registerHelp(desc);
runtimeDensityFile.registerHelp(desc);
+
+ poissonSolver = std::make_shared();
+ poissonSolver->registerHelp(desc);
}
virtual void startSimulation() override
@@ -348,6 +352,9 @@ namespace picongpu
// initialize runtime density file paths
runtimeDensityFile.init();
+ // create memory for poisson solver
+ poissonSolver->init(*cellDescription);
+
// create factory for the random number generator
uint32_t const userSeed = random::seed::ISeed{}();
uint32_t const seed = std::hash{}(std::to_string(userSeed));
@@ -490,6 +497,7 @@ namespace picongpu
initialiserController->init();
simulation::stage::ParticleInit{}(step);
(*atomicPhysics).fixAtomicStateInit(*cellDescription);
+ (*poissonSolver)(step);
// Check Debye resolution
particles::debyeLength::check(*cellDescription);
}
@@ -632,6 +640,8 @@ namespace picongpu
// Because of it, has a special init() method that has to be called during initialization of the simulation
simulation::stage::RuntimeDensityFile runtimeDensityFile;
+ std::shared_ptr poissonSolver;
+
IInitPlugin* initialiserController{nullptr};
std::unique_ptr cellDescription;
diff --git a/include/picongpu/simulation/stage/Poisson.hpp b/include/picongpu/simulation/stage/Poisson.hpp
new file mode 100644
index 0000000000..4b5d9d98dd
--- /dev/null
+++ b/include/picongpu/simulation/stage/Poisson.hpp
@@ -0,0 +1,104 @@
+/* Copyright 2025 Tapish Narwal, Luca Pennati, Rene Widera
+ *
+ * This file is part of PIConGPU.
+ *
+ * PIConGPU is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * PIConGPU is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with PIConGPU.
+ * If not, see .
+ */
+
+#pragma once
+
+#include "picongpu/defines.hpp"
+#include "picongpu/fields/FieldJ.hpp"
+#include "picongpu/fields/FieldJ.kernel"
+#include "picongpu/fields/FieldTmpOperations.hpp"
+#include "picongpu/fields/currentDeposition/Deposit.hpp"
+#include "picongpu/fields/poissonSolver/FieldV.hpp"
+#include "picongpu/particles/filter/filter.hpp"
+#include "picongpu/particles/param.hpp"
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+
+#include
+
+namespace picongpu::simulation::stage
+{
+ //! Functor for the stage of the PIC loop performing charge deposition
+ struct Poisson
+ {
+ Poisson();
+
+ void init(picongpu::MappingDesc const mappingDesc);
+
+ void registerHelp(po::options_description& desc);
+
+ /** Compute the current created by particles and add it to the current
+ * density
+ *
+ * @param currentStep index of time iteration
+ */
+ void operator()(uint32_t const currentStep);
+
+ void preconditioner(
+ std::unique_ptr>& xBuffer,
+ std::unique_ptr>& bBuffer);
+
+ private:
+ void participate(bool status)
+ {
+ mpiReduce.participate(status);
+ }
+
+ template
+ auto reduceGlobal(DataSpace fieldSize, auto dataBoxIn, T_ReduceFunc reduceFunctor = T_ReduceFunc{});
+
+ std::unique_ptr> pkBuffer;
+ std::unique_ptr> rkBuffer;
+ std::unique_ptr> r0Buffer;
+ std::unique_ptr> mpkBuffer;
+ std::unique_ptr> ampkBuffer;
+ std::unique_ptr> zkBuffer;
+ std::unique_ptr> azkBuffer;
+
+ std::unique_ptr> yBuffer;
+ std::unique_ptr> wBuffer;
+ std::unique_ptr> zBuffer;
+
+ std::shared_ptr fieldV;
+
+ std::optional m_mappingDesc;
+
+ mpi::MPIReduce mpiReduce;
+ std::unique_ptr localReduce;
+
+ // defaults will be overwritten by command line arguments
+ bool m_useSolver = false;
+ uint32_t m_maxSolverSteps = 20;
+ float_64 m_solverEpsilon = 1.0e-8;
+
+ bool m_disablePreconditioner = false;
+ uint32_t m_maxPreconditionerSteps = 20;
+ };
+} // namespace picongpu::simulation::stage
diff --git a/include/picongpu/simulation/stage/Poisson.x.cpp b/include/picongpu/simulation/stage/Poisson.x.cpp
new file mode 100644
index 0000000000..bc92e90e98
--- /dev/null
+++ b/include/picongpu/simulation/stage/Poisson.x.cpp
@@ -0,0 +1,750 @@
+/* Copyright 2025 Tapish Narwal, Luca Pennati, Rene Widera
+ *
+ * This file is part of PIConGPU.
+ *
+ * PIConGPU is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * PIConGPU is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with PIConGPU.
+ * If not, see .
+ */
+
+#include "picongpu/simulation/stage/Poisson.hpp"
+
+#include "picongpu/defines.hpp"
+#include "picongpu/fields/FieldJ.hpp"
+#include "picongpu/fields/FieldJ.kernel"
+#include "picongpu/fields/FieldTmpOperations.hpp"
+#include "picongpu/fields/currentDeposition/Deposit.hpp"
+#include "picongpu/fields/poissonSolver/BoundaryConditions.hpp"
+#include "picongpu/fields/poissonSolver/RightHandSideNormalization.hpp"
+#include "picongpu/fields/poissonSolver/Stencil.hpp"
+#include "picongpu/particles/filter/filter.hpp"
+#include "picongpu/particles/param.hpp"
+#include "picongpu/particles/particleToGrid/CombinedDerive.hpp"
+#include "picongpu/particles/particleToGrid/ComputeGridValuePerFrame.hpp"
+#include "picongpu/simulation/stage/Poisson.hpp"
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+
+#include
+
+namespace picongpu::simulation::stage
+{
+ template
+ struct DeviceLambda
+ {
+ T_Func const func;
+
+ template
+ DEVICEONLY auto operator()(T&&... args) const
+ {
+ return func(std::forward(args)...);
+ }
+
+ template
+ DEVICEONLY auto operator()(T&&... args)
+ {
+ return func(std::forward(args)...);
+ }
+ };
+
+ template
+ DeviceLambda(T_Func const) -> DeviceLambda;
+} // namespace picongpu::simulation::stage
+
+namespace alpaka
+{
+ template
+ struct IsKernelArgumentTriviallyCopyable, void> : std::true_type
+ {
+ };
+} // namespace alpaka
+
+namespace picongpu
+{
+ namespace simulation
+ {
+ namespace stage
+ {
+ namespace detail
+ {
+
+ template
+ struct ComputeChargeDensity
+ {
+ using SpeciesType = pmacc::particles::meta::FindByNameOrType_t;
+ static uint32_t const area = T_Area::value;
+
+ HINLINE void operator()(FieldTmp& fieldTmp, uint32_t const currentStep) const
+ {
+ DataConnector& dc = Environment<>::get().DataConnector();
+
+ /* load species without copying the particle data to the host */
+ auto speciesTmp = dc.get(SpeciesType::FrameType::getName());
+
+ /* run algorithm */
+ using ChargeDensitySolver = typename particles::particleToGrid::CreateFieldTmpOperation_t<
+ SpeciesType,
+ particles::particleToGrid::derivedAttributes::ChargeDensity>::Solver;
+
+ computeFieldTmpValue(fieldTmp, *speciesTmp, currentStep);
+ }
+ };
+
+ } // namespace detail
+
+ namespace deriveField = particles::particleToGrid;
+ template
+ using SpeciesEligibleForChargeConservation = typename particles::traits::
+ SpeciesEligibleForSolver::type;
+
+ Poisson::Poisson() : localReduce{std::make_unique(1024)}
+ {
+ }
+
+ void Poisson::registerHelp(po::options_description& desc)
+ {
+ namespace po = boost::program_options;
+ po::options_description solverDesc("Poisson solver");
+
+ solverDesc.add_options()(
+ "poisson.activate",
+ po::value(&m_useSolver)->zero_tokens(),
+ "enable poisson solver");
+ solverDesc.add_options()(
+ "poisson.maxSteps",
+ po::value(&m_maxSolverSteps)->default_value(2000),
+ "maximum number of steps for the preconditioner");
+ solverDesc.add_options()(
+ "poisson.epsilon",
+ po::value(&m_solverEpsilon)->default_value(1.0e-8),
+ "maximal allowed error of the poisson solver");
+ // preconitioner
+ solverDesc.add_options()(
+ "poisson.preconditioner.disable",
+ po::value(&m_disablePreconditioner)->zero_tokens(),
+ "disable poisson solver preconditioner");
+ solverDesc.add_options()(
+ "poisson.preconditioner.maxSteps",
+ po::value(&m_maxPreconditionerSteps)->default_value(20),
+ "maximum number of steps for the preconditioner");
+ desc.add(solverDesc);
+ }
+
+ void Poisson::init(MappingDesc const mappingDesc)
+ {
+ m_mappingDesc = std::make_optional(mappingDesc);
+
+ if(m_useSolver)
+ {
+ pkBuffer = std::make_unique>(m_mappingDesc->getGridLayout());
+ rkBuffer = std::make_unique>(m_mappingDesc->getGridLayout());
+ r0Buffer = std::make_unique>(m_mappingDesc->getGridLayout());
+ mpkBuffer = std::make_unique>(m_mappingDesc->getGridLayout());
+ ampkBuffer = std::make_unique>(m_mappingDesc->getGridLayout());
+ zkBuffer = std::make_unique>(m_mappingDesc->getGridLayout());
+ azkBuffer = std::make_unique>(m_mappingDesc->getGridLayout());
+ fieldV = std::make_shared(m_mappingDesc.value());
+
+ yBuffer = std::make_unique>(m_mappingDesc->getGridLayout());
+ wBuffer = std::make_unique>(m_mappingDesc->getGridLayout());
+ zBuffer = std::make_unique>(m_mappingDesc->getGridLayout());
+
+ auto const commTag0 = pmacc::traits::getUniqueId();
+ auto const commTag1 = pmacc::traits::getUniqueId();
+ auto const commTagPk = pmacc::traits::getUniqueId();
+ auto const commTagRk = pmacc::traits::getUniqueId();
+ auto const commTagFieldV = pmacc::traits::getUniqueId();
+
+ auto const commTagY = pmacc::traits::getUniqueId();
+ /*go over all directions*/
+ for(uint32_t i = 1; i < NumberOfExchanges::value; ++i)
+ {
+ // set communication only for planes
+ if(FRONT % i == 0)
+ {
+ DataSpace relativeMask = Mask::getRelativeDirections(i);
+ /* guarding cells depend on direction
+ * for negative direction use originGuard else endGuard (relative direction ZERO is
+ * ignored) don't switch end and origin because this is a read buffer and no send buffer
+ */
+ auto guardingCells = DataSpace::create(0);
+ for(uint32_t d = 0; d < simDim; ++d)
+ guardingCells[d] = (relativeMask[d] == 0 ? 0 : 1);
+ mpkBuffer->addExchange(GUARD, i, guardingCells, commTag0);
+ zkBuffer->addExchange(GUARD, i, guardingCells, commTag1);
+ pkBuffer->addExchange(GUARD, i, guardingCells, commTagPk);
+ rkBuffer->addExchange(GUARD, i, guardingCells, commTagRk);
+ fieldV->fieldVBuffer->addExchange(GUARD, i, guardingCells, commTagFieldV);
+
+ yBuffer->addExchange(GUARD, i, guardingCells, commTagY);
+ }
+ }
+ DataConnector& dc = Environment<>::get().DataConnector();
+ dc.share(fieldV);
+
+ participate(true);
+ }
+ }
+
+ template
+ class TransformDataBox : private T_TranformFunctor
+ {
+ public:
+ using ValueType = decltype(std::declval()(DataSpace::create(0)));
+
+ static constexpr std::uint32_t Dim = simDim;
+
+ HDINLINE TransformDataBox() = default;
+
+ HDINLINE TransformDataBox(T_TranformFunctor transformFunc) : T_TranformFunctor(transformFunc)
+ {
+ }
+
+ HDINLINE TransformDataBox(TransformDataBox const&) = default;
+
+ HDINLINE ValueType operator()(DataSpace const& idx) const
+ {
+ return T_TranformFunctor::operator()(idx + m_offset);
+ }
+
+ HDINLINE ValueType operator[](DataSpace const idx) const
+ {
+ return T_TranformFunctor::operator()(idx + m_offset);
+ }
+
+ HDINLINE TransformDataBox shift(DataSpace const& offset) const
+ {
+ TransformDataBox result(*this);
+ result.m_offset += offset;
+ return result;
+ }
+
+ DataSpace m_offset = DataSpace::create(0);
+ };
+
+ template
+ inline auto Poisson::reduceGlobal(
+ DataSpace fieldSize,
+ auto dataBoxIn,
+ T_TranformFunctor reduceFunctor)
+ {
+ DataBoxDim1Access d1Access(dataBoxIn, fieldSize);
+
+ float_64 resultLocal = (*localReduce)(reduceFunctor, d1Access, fieldSize.productOfComponents());
+
+ // avoid deadlock between not finished pmacc tasks and mpi blocking collectives
+ eventSystem::getTransactionEvent().waitForFinished();
+ float_64 resultGlobal;
+ mpiReduce(reduceFunctor, &resultGlobal, &resultLocal, 1, mpi::reduceMethods::AllReduce());
+
+ return resultGlobal;
+ }
+
+ struct ForEachKernel
+ {
+ DINLINE auto operator()(
+ auto const& worker,
+ auto const& mapper,
+ auto const& func,
+ auto outBox,
+ auto... boxes) const -> void
+ {
+ DataSpace const superCellIdx(mapper.getSuperCellIndex(worker.blockDomIdxND()));
+ DataSpace superCellCellOffset = superCellIdx * SuperCellSize::toRT();
+
+ constexpr uint32_t cellsPerSuperCell = pmacc::math::CT::volume::type::value;
+
+ auto forEachCellInSupercell = lockstep::makeForEach(worker);
+
+ forEachCellInSupercell(
+ [&](int32_t const linearCellIdx)
+ {
+ DataSpace const cellIdx
+ = pmacc::math::mapToND(SuperCellSize::toRT(), linearCellIdx);
+ DataSpace const dataCellOffset = superCellCellOffset + cellIdx;
+ outBox[dataCellOffset] = func(outBox[dataCellOffset], boxes[dataCellOffset]...);
+ });
+ }
+ };
+
+ inline void forEachCell(auto mapper, auto const& functor, auto& outBuffer, auto&... buffers)
+ {
+ PMACC_LOCKSTEP_KERNEL(ForEachKernel{})
+ .config(mapper.getGridDim(), SuperCellSize{})(
+ mapper,
+ DeviceLambda{functor},
+ outBuffer.getDataBox(),
+ buffers.getDataBox()...);
+ }
+
+ void Poisson::preconditioner(
+ std::unique_ptr>& xBuffer,
+ std::unique_ptr>& bBuffer)
+ {
+ yBuffer->getDeviceBuffer().setValue(0.0);
+ wBuffer->getDeviceBuffer().setValue(0.0);
+ zBuffer->getDeviceBuffer().setValue(0.0);
+
+ SubGrid const& subGrid = Environment::get().SubGrid();
+ auto globalDomain = subGrid.getGlobalDomain().size;
+ auto cellSizeSquared = sim.pic.getCellSize() * sim.pic.getCellSize();
+
+ float_64 eigenMin = 0.0;
+ float_64 eigenMax = 0.0;
+
+ for(uint32_t d = 0; d < simDim; ++d)
+ {
+ eigenMin += 4.0 * math::sin(1.0 * pmacc::math::Pi::halfValue / (globalDomain[d] + 1))
+ * math::sin(1.0 * pmacc::math::Pi::halfValue / (globalDomain[d] + 1))
+ / (cellSizeSquared[d]);
+
+ eigenMax
+ += 4.0
+ * math::sin(globalDomain[d] * pmacc::math::Pi::halfValue / (globalDomain[d] + 1))
+ * math::sin(globalDomain[d] * pmacc::math::Pi::halfValue / (globalDomain[d] + 1))
+ / (cellSizeSquared[d]);
+ }
+
+ float_64 const theta = 0.5 * (eigenMax + eigenMin);
+ float_64 const delta = 0.5 * (eigenMax - eigenMin);
+ float_64 const sigma = theta / delta;
+
+ float_64 rhoOld = 1. / sigma;
+ float_64 rhoCurrent = 1. / (2. * sigma - rhoOld);
+
+ bBuffer->communication();
+
+ auto coreBorderMapper = makeAreaMapper(m_mappingDesc.value());
+
+ namespace poi = fields::poissonSolver;
+
+ forEachCell(
+ coreBorderMapper,
+ [theta] DEVICEONLY(auto, auto const bValue) -> float_64 { return bValue / theta; },
+ zBuffer->getDeviceBuffer(),
+ bBuffer->getDeviceBuffer());
+
+ poi::stencil(
+ coreBorderMapper,
+ poi::StencilFunc{},
+ yBuffer->getDeviceBuffer(),
+ bBuffer->getDeviceBuffer());
+
+ forEachCell(
+ coreBorderMapper,
+ [rhoCurrent, theta, delta] DEVICEONLY(auto const yValue) -> float_64
+ { return -2.0 * rhoCurrent * yValue / theta / delta; },
+ yBuffer->getDeviceBuffer());
+
+ forEachCell(
+ coreBorderMapper,
+ [rhoCurrent, delta] DEVICEONLY(auto const yValue, auto const bValue) -> float_64
+ { return yValue + 4.0 * bValue * rhoCurrent / delta; },
+ yBuffer->getDeviceBuffer(),
+ bBuffer->getDeviceBuffer());
+
+
+ uint32_t iterMax = m_maxPreconditionerSteps;
+ for(uint32_t i = 2; i < iterMax; ++i)
+ {
+ rhoOld = rhoCurrent;
+ rhoCurrent = 1. / (2. * sigma - rhoOld);
+ yBuffer->communication();
+
+ poi::stencil(
+ coreBorderMapper,
+ poi::StencilFunc{},
+ wBuffer->getDeviceBuffer(),
+ yBuffer->getDeviceBuffer());
+
+ forEachCell(
+ coreBorderMapper,
+ [rhoCurrent, delta] DEVICEONLY(auto const wValue) -> float_64
+ { return -2.0 * rhoCurrent * wValue / delta; },
+ wBuffer->getDeviceBuffer());
+
+ forEachCell(
+ coreBorderMapper,
+ [rhoCurrent, delta] DEVICEONLY(auto const wValue, auto const bValue) -> float_64
+ { return wValue + 2.0 * rhoCurrent * bValue / delta; },
+ wBuffer->getDeviceBuffer(),
+ bBuffer->getDeviceBuffer());
+
+ forEachCell(
+ coreBorderMapper,
+ [rhoCurrent, sigma] DEVICEONLY(auto const wValue, auto const yValue) -> float_64
+ { return wValue + 2.0 * rhoCurrent * sigma * yValue; },
+ wBuffer->getDeviceBuffer(),
+ yBuffer->getDeviceBuffer());
+
+ forEachCell(
+ coreBorderMapper,
+ [rhoCurrent, rhoOld] DEVICEONLY(auto const wValue, auto const zValue) -> float_64
+ { return wValue - rhoCurrent * rhoOld * zValue; },
+ wBuffer->getDeviceBuffer(),
+ zBuffer->getDeviceBuffer());
+
+ zBuffer->getDeviceBuffer().copyFrom(yBuffer->getDeviceBuffer());
+ yBuffer->getDeviceBuffer().copyFrom(wBuffer->getDeviceBuffer());
+ } // loop
+ xBuffer->getDeviceBuffer().copyFrom(wBuffer->getDeviceBuffer());
+ }
+
+ void Poisson::operator()(uint32_t const currentStep)
+ {
+ namespace poi = fields::poissonSolver;
+
+ // only one rank is writing status initial information of the poisson solver
+ bool mainRank = Environment::get().GridController().getGlobalRank() == 0;
+ if(mainRank)
+ log("Poisson solver:");
+ if(!m_useSolver)
+ {
+ if(mainRank)
+ log(" - disabled");
+ return;
+ }
+ eventSystem::getTransactionEvent().waitForFinished();
+ auto beginT = std::chrono::high_resolution_clock::now();
+
+ using namespace pmacc;
+ constexpr uint fieldRhoSlot = 0;
+ DataConnector& dc = Environment<>::get().DataConnector();
+ auto& fieldRho = *dc.get(FieldTmp::getUniqueId(fieldRhoSlot));
+
+ DataSpace numGuardCells = fieldRho.getGridLayout().guardSizeND();
+ DataSpace coreBorderSize = fieldRho.getGridLayout().sizeWithoutGuardND();
+
+ using EligibleSpecies = pmacc::mp_filter;
+ /* calculate and add the charge density values from all species in FieldTmp */
+ meta::ForEach<
+ EligibleSpecies,
+ detail::ComputeChargeDensity>,
+ boost::mpl::_1>
+ computeChargeDensity;
+
+ fieldRho.getGridBuffer().getDeviceBuffer().setValue(FieldTmp::ValueType(0.0));
+ computeChargeDensity(fieldRho, currentStep);
+
+ /* add results of all species that are still in GUARD to next GPUs BORDER */
+ EventTask fieldTmpEvent = fieldRho.asyncCommunication(eventSystem::getTransactionEvent());
+ eventSystem::setTransactionEvent(fieldTmpEvent);
+
+ auto boundaryConditionsDirichlet = poi::BoundaryConditionsDirichlet{};
+ boundaryConditionsDirichlet(*fieldV.get(), m_mappingDesc.value());
+
+ auto rightHandSideNormalization = poi::RightHandSideNormalization{};
+ rightHandSideNormalization(*fieldV.get(), fieldRho, m_mappingDesc.value());
+
+
+ float_64 normRho;
+ {
+ auto rhoDeviceBox = fieldRho.getDeviceDataBox().shift(numGuardCells);
+ TransformDataBox fieldTransform(
+ [rhoDeviceBox] DEVICEONLY(DataSpace const& idx) -> float_64
+ { return precisionCast(rhoDeviceBox[idx].x() * rhoDeviceBox[idx].x()); });
+
+ normRho = std::sqrt(reduceGlobal(coreBorderSize, fieldTransform));
+ }
+ // recalculate rho
+ fieldRho.getGridBuffer().getDeviceBuffer().setValue(FieldTmp::ValueType(0.0));
+ computeChargeDensity(fieldRho, currentStep);
+ /* add results of all species that are still in GUARD to next GPUs BORDER */
+ eventSystem::setTransactionEvent(fieldRho.asyncCommunication(eventSystem::getTransactionEvent()));
+
+ auto coreBorderMapper = makeAreaMapper(m_mappingDesc.value());
+
+ // normalize rho
+ forEachCell(
+ coreBorderMapper,
+ [normRho] DEVICEONLY(auto rhoValue) -> float_64 { return rhoValue.x() / normRho; },
+ fieldRho.getGridBuffer().getDeviceBuffer());
+
+ auto vMapper = makeAreaMapper(m_mappingDesc.value());
+
+ // normalize v
+ forEachCell(
+ vMapper,
+ [normRho] DEVICEONLY(auto const vValue) -> float_64 { return vValue / normRho; },
+ fieldV->fieldVBuffer->getDeviceBuffer());
+
+ poi::stencil(
+ coreBorderMapper,
+ poi::StencilFunc{},
+ r0Buffer->getDeviceBuffer(),
+ fieldV->fieldVBuffer->getDeviceBuffer());
+
+ forEachCell(
+ coreBorderMapper,
+ [] DEVICEONLY(auto const r0Value, auto const rhoValue) -> float_64
+ { return rhoValue.x() - r0Value; },
+ r0Buffer->getDeviceBuffer(),
+ fieldRho.getGridBuffer().getDeviceBuffer());
+
+ pkBuffer->getDeviceBuffer().copyFrom(r0Buffer->getDeviceBuffer());
+ rkBuffer->getDeviceBuffer().copyFrom(r0Buffer->getDeviceBuffer());
+
+ float_64 rho0;
+ /* rho reduction */
+ {
+ auto r0Box = r0Buffer->getDeviceBuffer().getDataBox();
+ auto r0BoxBorderGuard = r0Box.shift(numGuardCells);
+
+ TransformDataBox fieldTransform(
+ [r0BoxBorderGuard] DEVICEONLY(DataSpace const& idx) -> float_64
+ { return r0BoxBorderGuard[idx] * r0BoxBorderGuard[idx]; });
+
+ rho0 = reduceGlobal(coreBorderSize, fieldTransform);
+ }
+
+ float_64 rho1 = rho0;
+ float_64 totalSum2;
+
+ int maxIterations = m_maxSolverSteps;
+
+ bool foundSolution = false;
+ int iteration = 0;
+ for(; iteration < maxIterations; ++iteration)
+ {
+ // preconditioner
+ if(m_disablePreconditioner)
+ mpkBuffer->getDeviceBuffer().copyFrom(pkBuffer->getDeviceBuffer());
+ else
+ preconditioner(mpkBuffer, pkBuffer);
+
+ mpkBuffer->communication();
+
+ // w = Ap
+ poi::stencil(
+ coreBorderMapper,
+ poi::StencilFunc{},
+ ampkBuffer->getDeviceBuffer(),
+ mpkBuffer->getDeviceBuffer());
+
+ float_64 totalSum1;
+ /* local p = rw */
+ {
+ auto r0Box = r0Buffer->getDeviceBuffer().getDataBox();
+ auto r0BoxBorderGuard = r0Box.shift(numGuardCells);
+
+ auto ampkBox = ampkBuffer->getDeviceBuffer().getDataBox();
+ auto ampkBoxBorderGuard = ampkBox.shift(numGuardCells);
+
+ TransformDataBox fieldTransform(
+ [r0BoxBorderGuard, ampkBoxBorderGuard] DEVICEONLY(DataSpace const& idx) -> float_64
+ { return r0BoxBorderGuard[idx] * ampkBoxBorderGuard[idx]; });
+
+ totalSum1 = reduceGlobal(coreBorderSize, fieldTransform);
+ }
+ float_64 alpha = rho0 / totalSum1;
+ // r = r - alpha * w
+ forEachCell(
+ coreBorderMapper,
+ [alpha] DEVICEONLY(auto const rkValue, auto const ampkValue) -> float_64
+ { return rkValue - alpha * ampkValue; },
+ rkBuffer->getDeviceBuffer(),
+ ampkBuffer->getDeviceBuffer());
+
+ // preconditioner
+ if(m_disablePreconditioner)
+ zkBuffer->getDeviceBuffer().copyFrom(rkBuffer->getDeviceBuffer());
+ else
+ preconditioner(zkBuffer, rkBuffer);
+
+ zkBuffer->communication();
+
+ // t = A * r
+ poi::stencil(
+ coreBorderMapper,
+ poi::StencilFunc{},
+ azkBuffer->getDeviceBuffer(),
+ zkBuffer->getDeviceBuffer());
+
+ /* totalSum1 = azk * rk */
+ {
+ auto azkBox = azkBuffer->getDeviceBuffer().getDataBox();
+ auto azkBoxBorderGuard = azkBox.shift(numGuardCells);
+
+ auto rkBox = rkBuffer->getDeviceBuffer().getDataBox();
+ auto rkBoxBorderGuard = rkBox.shift(numGuardCells);
+
+ TransformDataBox fieldTransform(
+ [azkBoxBorderGuard, rkBoxBorderGuard] DEVICEONLY(DataSpace const& idx) -> float_64
+ { return azkBoxBorderGuard[idx] * rkBoxBorderGuard[idx]; });
+
+ totalSum1 = reduceGlobal(coreBorderSize, fieldTransform);
+ }
+
+ /* totalSum1 = azk * azk */
+ {
+ auto azkBox = azkBuffer->getDeviceBuffer().getDataBox();
+ auto azkBoxBorderGuard = azkBox.shift(numGuardCells);
+
+ TransformDataBox fieldTransform(
+ [azkBoxBorderGuard] DEVICEONLY(DataSpace const& idx) -> float_64
+ { return azkBoxBorderGuard[idx] * azkBoxBorderGuard[idx]; });
+
+ totalSum2 = reduceGlobal(coreBorderSize, fieldTransform);
+ }
+
+ float_64 omega = totalSum1 / totalSum2;
+
+ // v = v + alpha * mpk + omega * zk
+ forEachCell(
+ coreBorderMapper,
+ [alpha,
+ omega] DEVICEONLY(auto const vValue, auto const mpkValue, auto const zkValue) -> float_64
+ { return vValue + alpha * mpkValue + omega * zkValue; },
+ fieldV->fieldVBuffer->getDeviceBuffer(),
+ mpkBuffer->getDeviceBuffer(),
+ zkBuffer->getDeviceBuffer());
+
+ // rk = rk - omega * azk
+ forEachCell(
+ coreBorderMapper,
+ [omega] DEVICEONLY(auto const rkValue, auto const azkValue) -> float_64
+ { return rkValue - omega * azkValue; },
+ rkBuffer->getDeviceBuffer(),
+ azkBuffer->getDeviceBuffer());
+
+ /* totalSum1 = r0 * rk */
+ {
+ auto r0Box = r0Buffer->getDeviceBuffer().getDataBox();
+ auto r0BoxBorderGuard = r0Box.shift(numGuardCells);
+
+ auto rkBox = rkBuffer->getDeviceBuffer().getDataBox();
+ auto rkBoxBorderGuard = rkBox.shift(numGuardCells);
+
+ TransformDataBox fieldTransform(
+ [r0BoxBorderGuard, rkBoxBorderGuard] DEVICEONLY(DataSpace const& idx) -> float_64
+ { return r0BoxBorderGuard[idx] * rkBoxBorderGuard[idx]; });
+
+ totalSum1 = reduceGlobal(coreBorderSize, fieldTransform);
+ }
+ /* totalSum2 = rk * rk */
+ {
+ auto rkBox = rkBuffer->getDeviceBuffer().getDataBox();
+ auto rkBoxBorderGuard = rkBox.shift(numGuardCells);
+
+ TransformDataBox fieldTransform(
+ [rkBoxBorderGuard] DEVICEONLY(DataSpace const& idx) -> float_64
+ { return rkBoxBorderGuard[idx] * rkBoxBorderGuard[idx]; });
+
+ totalSum2 = reduceGlobal(coreBorderSize, fieldTransform);
+ }
+
+ rho1 = totalSum1;
+ float_64 beta = rho1 / rho0 * alpha / omega;
+ rho0 = rho1;
+ if(std::sqrt(totalSum2) < m_solverEpsilon)
+ {
+ foundSolution = true;
+ break;
+ }
+ // pk = rk + beta * (pk - omega * ampk)
+ forEachCell(
+ coreBorderMapper,
+ [beta,
+ omega] DEVICEONLY(auto const pkValue, auto const rkValue, auto const ampkValue) -> float_64
+ { return rkValue + beta * (pkValue - omega * ampkValue); },
+ pkBuffer->getDeviceBuffer(),
+ rkBuffer->getDeviceBuffer(),
+ ampkBuffer->getDeviceBuffer());
+
+ } // for loop
+
+ // avid deadlock due blocking colective MPI operation
+ eventSystem::getTransactionEvent().waitForFinished();
+
+ bool ioRank = mpiReduce.hasResult(mpi::reduceMethods::Reduce());
+
+ int maxGlobalIterations = 0;
+ mpiReduce(
+ pmacc::math::operation::Max(),
+ &maxGlobalIterations,
+ &iteration,
+ 1,
+ mpi::reduceMethods::Reduce());
+
+ float_64 maxGlobalNormRho = 0.0;
+ mpiReduce(pmacc::math::operation::Max(), &maxGlobalNormRho, &normRho, 1, mpi::reduceMethods::Reduce());
+
+ float_64 maxGlobalTotalSum2 = 0.0;
+ mpiReduce(
+ pmacc::math::operation::Max(),
+ &maxGlobalTotalSum2,
+ &totalSum2,
+ 1,
+ mpi::reduceMethods::Reduce());
+
+ if(foundSolution)
+ {
+ if(ioRank)
+ log(" - converged after %1%/%2% iterations with norm=%3%, total epsilon=%4%")
+ % maxGlobalIterations % maxIterations % maxGlobalNormRho % std::sqrt(maxGlobalTotalSum2);
+
+ // normalize v back
+ forEachCell(
+ coreBorderMapper,
+ [normRho] DEVICEONLY(auto const vValue) -> float_64
+ { return vValue * normRho / sim.pic.getEps0(); },
+ fieldV->fieldVBuffer->getDeviceBuffer());
+ fieldV->fieldVBuffer->communication();
+
+ // compute fieldE
+ auto& eField = *dc.get(FieldE::getName());
+
+ poi::stencil(
+ coreBorderMapper,
+ poi::GetFieldEStencil{},
+ eField.getGridBuffer().getDeviceBuffer(),
+ fieldV->fieldVBuffer->getDeviceBuffer());
+ setTransactionEvent(eField.asyncCommunication(eventSystem::getTransactionEvent()));
+ }
+
+ eventSystem::getTransactionEvent().waitForFinished();
+ auto endT = std::chrono::high_resolution_clock::now();
+ double duration = std::chrono::duration(endT - beginT).count();
+
+ // avid deadlock due blocking collective MPI operation
+ eventSystem::getTransactionEvent().waitForFinished();
+ double globalMaxDuration = 0.0;
+ mpiReduce(
+ pmacc::math::operation::Max(),
+ &globalMaxDuration,
+ &duration,
+ 1,
+ mpi::reduceMethods::Reduce());
+ if(ioRank)
+ log(" - duration %1% sec") % globalMaxDuration;
+
+ if(ioRank && !foundSolution)
+ {
+ log(" - not converge after %1% iterations with norm=%2%, total epsilon=%3%")
+ % maxIterations % normRho % std::sqrt(rho1);
+ throw std::runtime_error("Poisson solver did not converge after max iterations");
+ }
+ }
+ } // namespace stage
+ } // namespace simulation
+} // namespace picongpu
diff --git a/include/pmacc/boundary/Utility.hpp b/include/pmacc/boundary/Utility.hpp
index f919f43016..7a97664781 100644
--- a/include/pmacc/boundary/Utility.hpp
+++ b/include/pmacc/boundary/Utility.hpp
@@ -33,7 +33,7 @@ namespace pmacc
*
* @param exchangeType number characterizing exchange @see pmacc::type::ExchangeType
*/
- HDINLINE bool isAxisAligned(uint32_t exchangeType)
+ constexpr bool isAxisAligned(uint32_t exchangeType)
{
return (FRONT % exchangeType == 0);
}
diff --git a/include/pmacc/device/Reduce.hpp b/include/pmacc/device/Reduce.hpp
index af6a40785b..80c82f9d56 100644
--- a/include/pmacc/device/Reduce.hpp
+++ b/include/pmacc/device/Reduce.hpp
@@ -57,6 +57,13 @@ namespace pmacc
{
}
+ void tmpBufferAlloc()
+ {
+ // lazy allocation of the result buffer
+ if(!reduceBuffer)
+ reduceBuffer = std::make_unique>(DataSpace(byte));
+ }
+
/** Reduce elements in global gpu memory
*
* @param func binary functor for reduce which takes two arguments, first argument is the source and
@@ -89,8 +96,7 @@ namespace pmacc
threads = n;
// lazy allocation of the result buffer
- if(!reduceBuffer)
- reduceBuffer = std::make_unique>(DataSpace(byte));
+ tmpBufferAlloc();
auto* dest = (Type*) reduceBuffer->getDeviceBuffer().data();
diff --git a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/fileOutput.param b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/fileOutput.param
new file mode 100644
index 0000000000..1b3793315b
--- /dev/null
+++ b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/fileOutput.param
@@ -0,0 +1,123 @@
+/* Copyright 2013-2024 Axel Huebl, Rene Widera, Felix Schmitt,
+ * Benjamin Worpitz, Richard Pausch, Pawel Ordyna
+ *
+ * This file is part of PIConGPU.
+ *
+ * PIConGPU is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * PIConGPU is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with PIConGPU.
+ * If not, see .
+ */
+
+#pragma once
+
+#include
+
+/* some forward declarations we need */
+#include "picongpu/fields/Fields.def"
+#include "picongpu/particles/filter/filter.def"
+#include "picongpu/particles/particleToGrid/CombinedDerive.def"
+#include "picongpu/particles/particleToGrid/ComputeGridValuePerFrame.def"
+
+namespace picongpu
+{
+ /** FieldTmp output (calculated at runtime) *******************************
+ *
+ * Those operations derive scalar field quantities from particle species
+ * at runtime. Each value is mapped per cell. Some operations are identical
+ * up to a constant, so avoid writing those twice to save storage.
+ *
+ * you can choose any of these particle to grid projections:
+ * - Density: particle position + shape on the grid
+ * - BoundElectronDensity: density of bound electrons
+ * note: only makes sense for partially ionized ions
+ * - ChargeDensity: density * charge
+ * note: for species that do not change their charge state, this is
+ * the same as the density times a constant for the charge
+ * - Energy: sum of kinetic particle energy per cell with respect to shape
+ * - EnergyDensity: average kinetic particle energy per cell times the
+ * particle density
+ * note: this is the same as the sum of kinetic particle energy
+ * divided by a constant for the cell volume
+ * - Momentum: sum of chosen component of momentum per cell with respect to shape
+ * - MomentumDensity: average chosen component of momentum per cell times the particle density
+ * note: this is the same as the sum of the chosen momentum component
+ * divided by a constant for the cell volume
+ * - LarmorPower: radiated Larmor power
+ * (species must contain the attribute `momentumPrev1`)
+ *
+ * for debugging:
+ * - MidCurrentDensityComponent:
+ * density * charge * velocity_component
+ * - Counter: counts point like particles per cell
+ * - MacroCounter: counts point like macro particles per cell
+ *
+ * combined attributes:
+ * These attributes are defined as a function of two primary derived attributes.
+ *
+ * - AverageAttribute: template to compute a per particle average of any derived value
+ * - RelativisticDensity: equals to 1/gamma^2 * n. Where gamma is the Lorentz factor for the mean kinetic energy
+ * in the cell and n ist the usual number density. Useful for Faraday Rotation calculation.
+ *
+ * Example use:
+ * @code{.cpp}
+ * using AverageEnergy_Seq = deriveField::CreateEligible_t<
+ * VectorAllSpecies,
+ * deriveField::combinedAttributes::AverageAttribute>;
+ * using RelativisticDensity_Seq
+ * = deriveField::CreateEligible_t;
+ * @endcode
+ *
+ * Filtering:
+ * You can create derived fields from filtered particles. Only particles passing
+ * the filter will contribute to the field quantity.
+ * For that you need to define your filters in `particleFilters.param` and pass a
+ * filter as the 3rd (optional) template argument in `CreateEligible_t`.
+ *
+ * Example: This will create charge density field for all species that are
+ * eligible for the this attribute and the chosen filter.
+ * @code{.cpp}
+ * using ChargeDensity_Seq
+ = deriveField::CreateEligible_t< VectorAllSpecies,
+ deriveField::derivedAttributes::ChargeDensity, filter::FilterOfYourChoice>;
+ * @endcode
+ */
+ namespace deriveField = particles::particleToGrid;
+
+ /* ChargeDensity section */
+ using ChargeDensity_Seq
+ = deriveField::CreateEligible_t;
+
+ /** FieldTmpSolvers groups all solvers that create data for FieldTmp ******
+ *
+ * FieldTmpSolvers is used in @see FieldTmp to calculate the exchange size
+ */
+ using FieldTmpSolvers = MakeSeq_t;
+
+
+ /** FileOutputFields: Groups all Fields that shall be dumped *************/
+
+ /** Possible native fields: FieldE, FieldB, FieldJ
+ */
+ using NativeFileOutputFields = MakeSeq_t;
+
+ using FileOutputFields = MakeSeq_t;
+
+
+ /** FileOutputParticles: Groups all Species that shall be dumped **********
+ *
+ * hint: to disable particle output set to
+ * using FileOutputParticles = MakeSeq_t< >;
+ */
+ using FileOutputParticles = MakeSeq_t<>;
+
+} // namespace picongpu
diff --git a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/particle.param b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/particle.param
index 33e866d718..acbb710dfe 100644
--- a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/particle.param
+++ b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/particle.param
@@ -48,20 +48,20 @@ namespace picongpu
*
* Vector is automatically reduced to two dimensions for 2D (x,y) simulations.
*/
- struct QuietParam
+ struct RandomParameter
{
- /** Count of macro-particles per cell per direction at initial state
+ /** Maximum number of macro-particles per cell during density profile evaluation.
*
- * unit: none */
- using numParticlesPerDimension = mCT::shrinkTo<
- mCT::Int,
- simDim>::type;
+ * Determines the weighting of a macro particle as well as the number of
+ * macro-particles which sample the evolution of the particle distribution
+ * function in phase space.
+ *
+ * unit: none
+ */
+ static constexpr uint32_t numParticlesPerCell = 25u;
};
- /** Definition of Quiet start position functor that positions macro-particles regularly on the grid.
- * No random number generator used.
- */
- using Quiet = QuietImpl;
+ using Random = RandomImpl;
} // namespace startPosition
diff --git a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/speciesInitialization.param b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/speciesInitialization.param
index 7b6b819c38..12ccda19dc 100644
--- a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/speciesInitialization.param
+++ b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/speciesInitialization.param
@@ -39,8 +39,8 @@ namespace picongpu
* the functors are called in order (from first to last functor)
*/
using InitPipeline = pmacc::mp_list<
- CreateDensity,
- Derive,
+ CreateDensity,
+ CreateDensity,
Manipulate,
Manipulate,
Manipulate,