diff --git a/include/picongpu/fields/Fields.def b/include/picongpu/fields/Fields.def index 2744b6aac7..777c6e94d8 100644 --- a/include/picongpu/fields/Fields.def +++ b/include/picongpu/fields/Fields.def @@ -68,4 +68,8 @@ namespace picongpu /** Current Density j, @see FieldJ.hpp */ class FieldJ; + namespace fields::poissonSolver + { + struct FieldV; + } // namespace fields::poissonSolver } // namespace picongpu diff --git a/include/picongpu/fields/Fields.hpp b/include/picongpu/fields/Fields.hpp index 8ab7dd9305..dee16e1fe7 100644 --- a/include/picongpu/fields/Fields.hpp +++ b/include/picongpu/fields/Fields.hpp @@ -25,3 +25,4 @@ #include "picongpu/fields/FieldJ.hpp" #include "picongpu/fields/FieldTmp.hpp" #include "picongpu/fields/Fields.def" +#include "picongpu/fields/poissonSolver/FieldV.hpp" diff --git a/include/picongpu/fields/poissonSolver/BICGStab.hpp b/include/picongpu/fields/poissonSolver/BICGStab.hpp new file mode 100644 index 0000000000..95dcac28fa --- /dev/null +++ b/include/picongpu/fields/poissonSolver/BICGStab.hpp @@ -0,0 +1,35 @@ +/* Copyright 2025 Tapish Narwal, Luca Pennati, Rene Widera + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragme once + +#include "picongpu/defines.hpp" +#include "picongpu/fields/FieldTmpOperations.hpp" + +struct BICGStab +{ + // return residual + // return number of iterations + void operator()(FieldTmp& fieldV, FieldTmp& fiedlRho, MappingDesk* cellDescription) + { + // set boundary conditions on fieldV (Dirichlet or Neuman) + + // normalize the problem based on norm(fieldRho) + } +}; diff --git a/include/picongpu/fields/poissonSolver/BoundaryConditions.hpp b/include/picongpu/fields/poissonSolver/BoundaryConditions.hpp new file mode 100644 index 0000000000..663b58538a --- /dev/null +++ b/include/picongpu/fields/poissonSolver/BoundaryConditions.hpp @@ -0,0 +1,118 @@ +/* Copyright 2025 Tapish Narwal, Luca Pennati, Rene Widera + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/defines.hpp" +#include "picongpu/fields/FieldTmpOperations.hpp" +#include "picongpu/fields/poissonSolver/FieldV.hpp" + +#include +#include +#include + +namespace picongpu::fields::poissonSolver +{ + struct SolutionFunction + { + HDINLINE auto operator()(math::Vector const& totalCellCoordinate) const + { + if constexpr(simDim == 3u) + { + return math::sin(totalCellCoordinate.x()) + math::cos(totalCellCoordinate.y()) + + 3.0 * math::sin(totalCellCoordinate.z()) + + totalCellCoordinate.x() * totalCellCoordinate.productOfComponents() + 10.0; + } + else if constexpr(simDim == 2u) + { + return math::sin(totalCellCoordinate.x()) + math::cos(totalCellCoordinate.y()) + + totalCellCoordinate.x() * totalCellCoordinate.productOfComponents() + 10.0; + } + } + }; + + struct ApplyDirichletBCsFromFunctionKernel + { + DINLINE auto operator()( + auto const& worker, + auto fieldVBox, + auto const boundaryFunction, + DataSpace cellOffsetToTotalOrigin, + auto const mapper) const -> void + { + // including guards + DataSpace const superCellIdx(mapper.getSuperCellIndex(worker.blockDomIdxND())); + + DataSpace numGuardCells = mapper.getGuardingSuperCells() * SuperCellSize::toRT(); + + // no guards included + DataSpace superCellTotalCellOffset + = cellOffsetToTotalOrigin + superCellIdx * SuperCellSize::toRT() - numGuardCells; + + constexpr uint32_t cellsPerSuperCell = pmacc::math::CT::volume::type::value; + + auto forEachCellInSupercell = lockstep::makeForEach(worker); + + forEachCellInSupercell( + [&](int32_t const linearCellIdx) + { + /* cell index within the superCell */ + DataSpace const cellIdx = pmacc::math::mapToND(SuperCellSize::toRT(), linearCellIdx); + // without guards + DataSpace const totalCellIdx = superCellTotalCellOffset + cellIdx; + + auto totalDistance = precisionCast(totalCellIdx) + * precisionCast(sim.pic.getCellSize().shrink()); + + fieldVBox(superCellIdx * SuperCellSize::toRT() + cellIdx) = boundaryFunction(totalDistance); + }); + } + }; + + struct BoundaryConditionsDirichlet + { + // return residual + // return number of iterations + void operator()(FieldV& fieldV, MappingDesc cellDescription) const + { + SubGrid const& subGrid = Environment::get().SubGrid(); + auto globalDomain = subGrid.getGlobalDomain(); + auto localDomain = subGrid.getLocalDomain(); + + auto cellOffsetToTotalOrigin = globalDomain.offset + localDomain.offset; + + + for(uint32_t i = 1; i < NumberOfExchanges::value; ++i) + { + /* only call for planes: left right top bottom back front*/ + if(FRONT % i == 0 && !(Environment::get().GridController().getCommunicationMask().isSet(i))) + { + ExchangeMapping mapper(cellDescription, i); + + PMACC_LOCKSTEP_KERNEL(ApplyDirichletBCsFromFunctionKernel{}) + .config(mapper.getGridDim(), SuperCellSize{})( + fieldV.fieldVBuffer->getDeviceBuffer().getDataBox(), + SolutionFunction{}, + cellOffsetToTotalOrigin, + mapper); + } + } + } + }; +} // namespace picongpu::fields::poissonSolver diff --git a/include/picongpu/fields/poissonSolver/ChargeDeposition.hpp b/include/picongpu/fields/poissonSolver/ChargeDeposition.hpp new file mode 100644 index 0000000000..e6b0bda3e4 --- /dev/null +++ b/include/picongpu/fields/poissonSolver/ChargeDeposition.hpp @@ -0,0 +1,20 @@ +/* Copyright 2025 Tapish Narwal, Luca Pennati, Rene Widera + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once diff --git a/include/picongpu/fields/poissonSolver/FieldV.hpp b/include/picongpu/fields/poissonSolver/FieldV.hpp new file mode 100644 index 0000000000..73e93a6d32 --- /dev/null +++ b/include/picongpu/fields/poissonSolver/FieldV.hpp @@ -0,0 +1,121 @@ +/* Copyright 2025 Tapish Narwal, Luca Pennati, Rene Widera + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/defines.hpp" +#include "picongpu/fields/YeeCell.hpp" +#include "picongpu/traits/FieldPosition.hpp" +#include "picongpu/traits/SIBaseUnits.hpp" + +#include + +namespace picongpu::fields::poissonSolver +{ + struct FieldV : pmacc::ISimulationData + { + using ValueType = float_64; + + std::shared_ptr> fieldVBuffer; + + //! Unit type of field components + using UnitValueType = float1_64; + + using DataBoxType = pmacc::DataBox>; + + //! Number of components of ValueType, for serialization + static constexpr int numComponents = 1u; + + FieldV(picongpu::MappingDesc const mappingDesc) + : fieldVBuffer{std::make_shared>(mappingDesc.getGridLayout())} + { + } + + void synchronize() override + { + fieldVBuffer->getHostBuffer().copyFrom(fieldVBuffer->getDeviceBuffer()); + }; + + //! Get the host data box for the field values + DataBoxType getHostDataBox() + { + return fieldVBuffer->getHostBuffer().getDataBox(); + } + + //! Get the device data box for the field values + DataBoxType getDeviceDataBox() + { + return fieldVBuffer->getDeviceBuffer().getDataBox(); + } + + GridLayout getGridLayout() + { + return fieldVBuffer->getGridLayout(); + } + + /** + * Return the globally unique identifier for this simulation data. + * + * @return globally unique identifier + */ + SimulationDataId getUniqueId() override + { + return "FieldV"; + }; + + static std::string getName() + { + return "FieldV"; + } + + static UnitValueType getUnit() + { + return UnitValueType{sim.unit.eField() * sim.unit.length()}; + } + + static std::vector getUnitDimension() + { + /* V is in volts: V = kg * m^2 / (A * s^3) + * -> L^2 * M * T^-3 * I^-1 + */ + std::vector unitDimension(7, 0.0); + unitDimension.at(SIBaseUnits::length) = 2.0; + unitDimension.at(SIBaseUnits::mass) = 1.0; + unitDimension.at(SIBaseUnits::time) = -3.0; + unitDimension.at(SIBaseUnits::electricCurrent) = -1.0; + return unitDimension; + } + }; +} // namespace picongpu::fields::poissonSolver + +namespace picongpu::traits +{ + template<> + struct FieldPosition + { + using VectorVectorDD = ::pmacc::math::Vector const; + + HDINLINE FieldPosition() = default; + + HDINLINE VectorVectorDD operator()() const + { + return VectorVectorDD::create(floatD_X::create(0.0)); + } + }; +} // namespace picongpu::traits diff --git a/include/picongpu/fields/poissonSolver/RightHandSideNormalization.hpp b/include/picongpu/fields/poissonSolver/RightHandSideNormalization.hpp new file mode 100644 index 0000000000..a5fe7d6840 --- /dev/null +++ b/include/picongpu/fields/poissonSolver/RightHandSideNormalization.hpp @@ -0,0 +1,95 @@ +/* Copyright 2025 Tapish Narwal, Luca Pennati, Rene Widera + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/defines.hpp" +#include "picongpu/fields/FieldTmpOperations.hpp" +#include "picongpu/fields/poissonSolver/FieldV.hpp" + +#include +#include +#include +#include + +namespace picongpu::fields::poissonSolver +{ + struct RightHandSideNormalizationKernel + { + DINLINE auto operator()(auto const& worker, auto const fieldVBox, auto fieldRhoBox, auto const mapper) const + -> void + { + DataSpace const relExchangeDir = Mask::getRelativeDirections(mapper.getExchangeType()); + DataSpace const superCellIdx(mapper.getSuperCellIndex(worker.blockDomIdxND())); + DataSpace superCellCellOffset = superCellIdx * SuperCellSize::toRT(); + + DataSpace numGuardCells = mapper.getGuardingSuperCells() * SuperCellSize::toRT(); + DataSpace adjustedSuperCellCellOffset = superCellCellOffset - (relExchangeDir * numGuardCells); + + DataSpace numCellsLocalDomain = mapper.getGridSuperCells() * SuperCellSize::toRT(); + + constexpr uint32_t cellsPerSuperCell = pmacc::math::CT::volume::type::value; + + auto forEachCellInSupercell = lockstep::makeForEach(worker); + + forEachCellInSupercell( + [&](int32_t const linearCellIdx) + { + /* cell index within the superCell */ + DataSpace const cellIdx = pmacc::math::mapToND(SuperCellSize::toRT(), linearCellIdx); + + DataSpace const localCellIdx = superCellCellOffset + cellIdx; + DataSpace const dataCellIdx = adjustedSuperCellCellOffset + cellIdx; + + for(uint32_t d = 0; d < simDim; ++d) + { + if(relExchangeDir[d] != 0 + && (localCellIdx[d] == 0u || localCellIdx[d] == numCellsLocalDomain[d] - 1)) + { + fieldRhoBox(dataCellIdx) += fieldVBox(dataCellIdx + relExchangeDir) + / (sim.pic.getCellSize()[d] * sim.pic.getCellSize()[d]); + } + } + }); + } + }; + + struct RightHandSideNormalization + { + // return residual + // return number of iterations + void operator()(FieldV& fieldV, FieldTmp& fieldRho, MappingDesc cellDescription) + { + /* only call for planes: left right top bottom back front*/ + for(uint32_t i = 1; i < NumberOfExchanges::value; ++i) + { + if(FRONT % i == 0 && !(Environment::get().GridController().getCommunicationMask().isSet(i))) + { + ExchangeMapping mapper(cellDescription, i); + + PMACC_LOCKSTEP_KERNEL(RightHandSideNormalizationKernel{}) + .config(mapper.getGridDim(), SuperCellSize{})( + fieldV.fieldVBuffer->getDeviceBuffer().getDataBox(), + fieldRho.getDeviceDataBox(), + mapper); + } + } + } + }; +} // namespace picongpu::fields::poissonSolver diff --git a/include/picongpu/fields/poissonSolver/Stencil.hpp b/include/picongpu/fields/poissonSolver/Stencil.hpp new file mode 100644 index 0000000000..4b98714241 --- /dev/null +++ b/include/picongpu/fields/poissonSolver/Stencil.hpp @@ -0,0 +1,147 @@ +/* Copyright 2025 Tapish Narwal, Luca Pennati, Rene Widera + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/defines.hpp" +#include "picongpu/fields/FieldTmpOperations.hpp" +#include "picongpu/fields/poissonSolver/FieldV.hpp" + +#include +#include +#include + +namespace picongpu::fields::poissonSolver +{ + struct StencilFunc + { + HDINLINE auto operator()(auto fieldIn) const + { + constexpr auto cellSize = sim.pic.getCellSize().shrink(); + constexpr auto cellSizeSquared = cellSize * cellSize; + float_64 const fc0 = 2.0 * (1.0 / (cellSizeSquared)).sumOfComponents(); + if constexpr(simDim == 3u) + { + constexpr DataSpace<3u> xDir(1, 0, 0); + constexpr DataSpace<3u> yDir(0, 1, 0); + constexpr DataSpace<3u> zDir(0, 0, 1); + + + return *fieldIn * fc0 - (fieldIn(-xDir) + fieldIn(xDir)) / cellSizeSquared.x() + - (fieldIn(-yDir) + fieldIn(yDir)) / cellSizeSquared.y() + - (fieldIn(-zDir) + fieldIn(zDir)) / cellSizeSquared.z(); + } + else if constexpr(simDim == 2u) + { + constexpr DataSpace<2u> xDir(1, 0); + constexpr DataSpace<2u> yDir(0, 1); + + return *fieldIn * fc0 - (fieldIn(-xDir) + fieldIn(xDir)) / cellSizeSquared.x() + - (fieldIn(-yDir) + fieldIn(yDir)) / cellSizeSquared.y(); + } + } + }; + + struct GetFieldEStencil + { + HDINLINE auto operator()(auto fieldIn) const + { + constexpr auto cellSize = sim.pic.getCellSize().shrink(); + + float3_X eValue; + if constexpr(simDim == 3u) + { + constexpr DataSpace<3u> xDir(1, 0, 0); + constexpr DataSpace<3u> yDir(0, 1, 0); + constexpr DataSpace<3u> zDir(0, 0, 1); + + eValue.x() = (*fieldIn - fieldIn(xDir)) / cellSize.x(); + eValue.y() = (*fieldIn - fieldIn(yDir)) / cellSize.y(); + eValue.z() = (*fieldIn - fieldIn(zDir)) / cellSize.z(); + + return eValue; + } + else if constexpr(simDim == 2u) + { + constexpr DataSpace<3u> xDir(1, 0, 0); + constexpr DataSpace<3u> yDir(0, 1, 0); + + eValue.x() = (*fieldIn - fieldIn(xDir)) / cellSize.x(); + eValue.y() = (*fieldIn - fieldIn(yDir)) / cellSize.y(); + eValue.z() = 0.0_X; + + return eValue; + } + } + }; + + struct Stencil + { + DINLINE auto operator()( + auto const& worker, + auto const mapper, + auto const stencilFunctor, + auto fieldOut, + auto fieldIn) const -> void + { + DataSpace const superCellIdx(mapper.getSuperCellIndex(worker.blockDomIdxND())); + DataSpace superCellCellOffset = superCellIdx * SuperCellSize::toRT(); + + using Type = typename decltype(fieldIn)::ValueType; + using BlockArea = pmacc::SuperCellDescription< + SuperCellSize, + typename pmacc::math::CT::make_Int::type, + typename pmacc::math::CT::make_Int::type>; + + constexpr uint32_t cellsPerSuperCell = pmacc::math::CT::volume::type::value; + + // use the cached buffer, beacuse I am doing multiple reads, moves the blockArea to shared memory + auto cache = pmacc::CachedBox::create<0, Type>(worker, BlockArea()); + auto buffShifted = fieldIn.shift(superCellCellOffset); + + // the thread collective is a convenience wrapper for lockstep make for each + // it deals with the guard offset, subtracts the origin offset + auto collective = pmacc::makeThreadCollective(); + + pmacc::math::operation::Assign assign; + collective(worker, assign, cache, buffShifted); + + worker.sync(); + + auto forEachCellInSupercell = lockstep::makeForEach(worker); + + forEachCellInSupercell( + [&](int32_t const linearCellIdx) + { + /* cell index within the superCell */ + DataSpace const cellIdx = pmacc::math::mapToND(SuperCellSize::toRT(), linearCellIdx); + fieldOut[superCellCellOffset + cellIdx] = stencilFunctor(cache.shift(cellIdx)); + }); + } + }; + + inline void stencil(auto mapper, auto const& functor, auto& outBuffer, auto& inBuffer) + { + // poisson stencil + PMACC_LOCKSTEP_KERNEL(fields::poissonSolver::Stencil{}) + .config( + mapper.getGridDim(), + SuperCellSize{})(mapper, functor, outBuffer.getDataBox(), inBuffer.getDataBox()); + } +} // namespace picongpu::fields::poissonSolver diff --git a/include/picongpu/main.x.cpp b/include/picongpu/main.x.cpp index e4ff97b354..f31973e3df 100644 --- a/include/picongpu/main.x.cpp +++ b/include/picongpu/main.x.cpp @@ -44,25 +44,30 @@ int runSimulation(int argc, char** argv) { using namespace picongpu; - auto sim = ::picongpu:: - SimulationStarter<::picongpu::InitialiserController, ::picongpu::PluginController, ::picongpu::Simulation>{}; - auto const parserStatus = sim.parseConfigs(argc, argv); int errorCode = EXIT_FAILURE; - - switch(parserStatus) { - case ArgsParser::Status::error: - errorCode = EXIT_FAILURE; - break; - case ArgsParser::Status::success: - sim.load(); - sim.start(); - sim.unload(); - [[fallthrough]]; - case ArgsParser::Status::successExit: - errorCode = 0; - break; - }; + auto sim = ::picongpu::SimulationStarter< + ::picongpu::InitialiserController, + ::picongpu::PluginController, + ::picongpu::Simulation>{}; + auto const parserStatus = sim.parseConfigs(argc, argv); + + + switch(parserStatus) + { + case ArgsParser::Status::error: + errorCode = EXIT_FAILURE; + break; + case ArgsParser::Status::success: + sim.load(); + sim.start(); + sim.unload(); + [[fallthrough]]; + case ArgsParser::Status::successExit: + errorCode = 0; + break; + }; + } // finalize the pmacc context */ pmacc::Environment<>::get().finalize(); diff --git a/include/picongpu/simulation/control/Simulation.hpp b/include/picongpu/simulation/control/Simulation.hpp index 451c07d3b8..8af9173341 100644 --- a/include/picongpu/simulation/control/Simulation.hpp +++ b/include/picongpu/simulation/control/Simulation.hpp @@ -53,6 +53,7 @@ #include "picongpu/simulation/stage/ParticleInit.hpp" #include "picongpu/simulation/stage/ParticleIonization.hpp" #include "picongpu/simulation/stage/ParticlePush.hpp" +#include "picongpu/simulation/stage/Poisson.hpp" #include "picongpu/simulation/stage/RuntimeDensityFile.hpp" #include "picongpu/simulation/stage/SynchrotronRadiation.hpp" #include "picongpu/versionFormat.hpp" @@ -153,6 +154,9 @@ namespace picongpu fieldBackground.registerHelp(desc); particleBoundaries.registerHelp(desc); runtimeDensityFile.registerHelp(desc); + + poissonSolver = std::make_shared(); + poissonSolver->registerHelp(desc); } virtual void startSimulation() override @@ -348,6 +352,9 @@ namespace picongpu // initialize runtime density file paths runtimeDensityFile.init(); + // create memory for poisson solver + poissonSolver->init(*cellDescription); + // create factory for the random number generator uint32_t const userSeed = random::seed::ISeed{}(); uint32_t const seed = std::hash{}(std::to_string(userSeed)); @@ -490,6 +497,7 @@ namespace picongpu initialiserController->init(); simulation::stage::ParticleInit{}(step); (*atomicPhysics).fixAtomicStateInit(*cellDescription); + (*poissonSolver)(step); // Check Debye resolution particles::debyeLength::check(*cellDescription); } @@ -632,6 +640,8 @@ namespace picongpu // Because of it, has a special init() method that has to be called during initialization of the simulation simulation::stage::RuntimeDensityFile runtimeDensityFile; + std::shared_ptr poissonSolver; + IInitPlugin* initialiserController{nullptr}; std::unique_ptr cellDescription; diff --git a/include/picongpu/simulation/stage/Poisson.hpp b/include/picongpu/simulation/stage/Poisson.hpp new file mode 100644 index 0000000000..4b5d9d98dd --- /dev/null +++ b/include/picongpu/simulation/stage/Poisson.hpp @@ -0,0 +1,104 @@ +/* Copyright 2025 Tapish Narwal, Luca Pennati, Rene Widera + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/defines.hpp" +#include "picongpu/fields/FieldJ.hpp" +#include "picongpu/fields/FieldJ.kernel" +#include "picongpu/fields/FieldTmpOperations.hpp" +#include "picongpu/fields/currentDeposition/Deposit.hpp" +#include "picongpu/fields/poissonSolver/FieldV.hpp" +#include "picongpu/particles/filter/filter.hpp" +#include "picongpu/particles/param.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +namespace picongpu::simulation::stage +{ + //! Functor for the stage of the PIC loop performing charge deposition + struct Poisson + { + Poisson(); + + void init(picongpu::MappingDesc const mappingDesc); + + void registerHelp(po::options_description& desc); + + /** Compute the current created by particles and add it to the current + * density + * + * @param currentStep index of time iteration + */ + void operator()(uint32_t const currentStep); + + void preconditioner( + std::unique_ptr>& xBuffer, + std::unique_ptr>& bBuffer); + + private: + void participate(bool status) + { + mpiReduce.participate(status); + } + + template + auto reduceGlobal(DataSpace fieldSize, auto dataBoxIn, T_ReduceFunc reduceFunctor = T_ReduceFunc{}); + + std::unique_ptr> pkBuffer; + std::unique_ptr> rkBuffer; + std::unique_ptr> r0Buffer; + std::unique_ptr> mpkBuffer; + std::unique_ptr> ampkBuffer; + std::unique_ptr> zkBuffer; + std::unique_ptr> azkBuffer; + + std::unique_ptr> yBuffer; + std::unique_ptr> wBuffer; + std::unique_ptr> zBuffer; + + std::shared_ptr fieldV; + + std::optional m_mappingDesc; + + mpi::MPIReduce mpiReduce; + std::unique_ptr localReduce; + + // defaults will be overwritten by command line arguments + bool m_useSolver = false; + uint32_t m_maxSolverSteps = 20; + float_64 m_solverEpsilon = 1.0e-8; + + bool m_disablePreconditioner = false; + uint32_t m_maxPreconditionerSteps = 20; + }; +} // namespace picongpu::simulation::stage diff --git a/include/picongpu/simulation/stage/Poisson.x.cpp b/include/picongpu/simulation/stage/Poisson.x.cpp new file mode 100644 index 0000000000..bc92e90e98 --- /dev/null +++ b/include/picongpu/simulation/stage/Poisson.x.cpp @@ -0,0 +1,750 @@ +/* Copyright 2025 Tapish Narwal, Luca Pennati, Rene Widera + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#include "picongpu/simulation/stage/Poisson.hpp" + +#include "picongpu/defines.hpp" +#include "picongpu/fields/FieldJ.hpp" +#include "picongpu/fields/FieldJ.kernel" +#include "picongpu/fields/FieldTmpOperations.hpp" +#include "picongpu/fields/currentDeposition/Deposit.hpp" +#include "picongpu/fields/poissonSolver/BoundaryConditions.hpp" +#include "picongpu/fields/poissonSolver/RightHandSideNormalization.hpp" +#include "picongpu/fields/poissonSolver/Stencil.hpp" +#include "picongpu/particles/filter/filter.hpp" +#include "picongpu/particles/param.hpp" +#include "picongpu/particles/particleToGrid/CombinedDerive.hpp" +#include "picongpu/particles/particleToGrid/ComputeGridValuePerFrame.hpp" +#include "picongpu/simulation/stage/Poisson.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +namespace picongpu::simulation::stage +{ + template + struct DeviceLambda + { + T_Func const func; + + template + DEVICEONLY auto operator()(T&&... args) const + { + return func(std::forward(args)...); + } + + template + DEVICEONLY auto operator()(T&&... args) + { + return func(std::forward(args)...); + } + }; + + template + DeviceLambda(T_Func const) -> DeviceLambda; +} // namespace picongpu::simulation::stage + +namespace alpaka +{ + template + struct IsKernelArgumentTriviallyCopyable, void> : std::true_type + { + }; +} // namespace alpaka + +namespace picongpu +{ + namespace simulation + { + namespace stage + { + namespace detail + { + + template + struct ComputeChargeDensity + { + using SpeciesType = pmacc::particles::meta::FindByNameOrType_t; + static uint32_t const area = T_Area::value; + + HINLINE void operator()(FieldTmp& fieldTmp, uint32_t const currentStep) const + { + DataConnector& dc = Environment<>::get().DataConnector(); + + /* load species without copying the particle data to the host */ + auto speciesTmp = dc.get(SpeciesType::FrameType::getName()); + + /* run algorithm */ + using ChargeDensitySolver = typename particles::particleToGrid::CreateFieldTmpOperation_t< + SpeciesType, + particles::particleToGrid::derivedAttributes::ChargeDensity>::Solver; + + computeFieldTmpValue(fieldTmp, *speciesTmp, currentStep); + } + }; + + } // namespace detail + + namespace deriveField = particles::particleToGrid; + template + using SpeciesEligibleForChargeConservation = typename particles::traits:: + SpeciesEligibleForSolver::type; + + Poisson::Poisson() : localReduce{std::make_unique(1024)} + { + } + + void Poisson::registerHelp(po::options_description& desc) + { + namespace po = boost::program_options; + po::options_description solverDesc("Poisson solver"); + + solverDesc.add_options()( + "poisson.activate", + po::value(&m_useSolver)->zero_tokens(), + "enable poisson solver"); + solverDesc.add_options()( + "poisson.maxSteps", + po::value(&m_maxSolverSteps)->default_value(2000), + "maximum number of steps for the preconditioner"); + solverDesc.add_options()( + "poisson.epsilon", + po::value(&m_solverEpsilon)->default_value(1.0e-8), + "maximal allowed error of the poisson solver"); + // preconitioner + solverDesc.add_options()( + "poisson.preconditioner.disable", + po::value(&m_disablePreconditioner)->zero_tokens(), + "disable poisson solver preconditioner"); + solverDesc.add_options()( + "poisson.preconditioner.maxSteps", + po::value(&m_maxPreconditionerSteps)->default_value(20), + "maximum number of steps for the preconditioner"); + desc.add(solverDesc); + } + + void Poisson::init(MappingDesc const mappingDesc) + { + m_mappingDesc = std::make_optional(mappingDesc); + + if(m_useSolver) + { + pkBuffer = std::make_unique>(m_mappingDesc->getGridLayout()); + rkBuffer = std::make_unique>(m_mappingDesc->getGridLayout()); + r0Buffer = std::make_unique>(m_mappingDesc->getGridLayout()); + mpkBuffer = std::make_unique>(m_mappingDesc->getGridLayout()); + ampkBuffer = std::make_unique>(m_mappingDesc->getGridLayout()); + zkBuffer = std::make_unique>(m_mappingDesc->getGridLayout()); + azkBuffer = std::make_unique>(m_mappingDesc->getGridLayout()); + fieldV = std::make_shared(m_mappingDesc.value()); + + yBuffer = std::make_unique>(m_mappingDesc->getGridLayout()); + wBuffer = std::make_unique>(m_mappingDesc->getGridLayout()); + zBuffer = std::make_unique>(m_mappingDesc->getGridLayout()); + + auto const commTag0 = pmacc::traits::getUniqueId(); + auto const commTag1 = pmacc::traits::getUniqueId(); + auto const commTagPk = pmacc::traits::getUniqueId(); + auto const commTagRk = pmacc::traits::getUniqueId(); + auto const commTagFieldV = pmacc::traits::getUniqueId(); + + auto const commTagY = pmacc::traits::getUniqueId(); + /*go over all directions*/ + for(uint32_t i = 1; i < NumberOfExchanges::value; ++i) + { + // set communication only for planes + if(FRONT % i == 0) + { + DataSpace relativeMask = Mask::getRelativeDirections(i); + /* guarding cells depend on direction + * for negative direction use originGuard else endGuard (relative direction ZERO is + * ignored) don't switch end and origin because this is a read buffer and no send buffer + */ + auto guardingCells = DataSpace::create(0); + for(uint32_t d = 0; d < simDim; ++d) + guardingCells[d] = (relativeMask[d] == 0 ? 0 : 1); + mpkBuffer->addExchange(GUARD, i, guardingCells, commTag0); + zkBuffer->addExchange(GUARD, i, guardingCells, commTag1); + pkBuffer->addExchange(GUARD, i, guardingCells, commTagPk); + rkBuffer->addExchange(GUARD, i, guardingCells, commTagRk); + fieldV->fieldVBuffer->addExchange(GUARD, i, guardingCells, commTagFieldV); + + yBuffer->addExchange(GUARD, i, guardingCells, commTagY); + } + } + DataConnector& dc = Environment<>::get().DataConnector(); + dc.share(fieldV); + + participate(true); + } + } + + template + class TransformDataBox : private T_TranformFunctor + { + public: + using ValueType = decltype(std::declval()(DataSpace::create(0))); + + static constexpr std::uint32_t Dim = simDim; + + HDINLINE TransformDataBox() = default; + + HDINLINE TransformDataBox(T_TranformFunctor transformFunc) : T_TranformFunctor(transformFunc) + { + } + + HDINLINE TransformDataBox(TransformDataBox const&) = default; + + HDINLINE ValueType operator()(DataSpace const& idx) const + { + return T_TranformFunctor::operator()(idx + m_offset); + } + + HDINLINE ValueType operator[](DataSpace const idx) const + { + return T_TranformFunctor::operator()(idx + m_offset); + } + + HDINLINE TransformDataBox shift(DataSpace const& offset) const + { + TransformDataBox result(*this); + result.m_offset += offset; + return result; + } + + DataSpace m_offset = DataSpace::create(0); + }; + + template + inline auto Poisson::reduceGlobal( + DataSpace fieldSize, + auto dataBoxIn, + T_TranformFunctor reduceFunctor) + { + DataBoxDim1Access d1Access(dataBoxIn, fieldSize); + + float_64 resultLocal = (*localReduce)(reduceFunctor, d1Access, fieldSize.productOfComponents()); + + // avoid deadlock between not finished pmacc tasks and mpi blocking collectives + eventSystem::getTransactionEvent().waitForFinished(); + float_64 resultGlobal; + mpiReduce(reduceFunctor, &resultGlobal, &resultLocal, 1, mpi::reduceMethods::AllReduce()); + + return resultGlobal; + } + + struct ForEachKernel + { + DINLINE auto operator()( + auto const& worker, + auto const& mapper, + auto const& func, + auto outBox, + auto... boxes) const -> void + { + DataSpace const superCellIdx(mapper.getSuperCellIndex(worker.blockDomIdxND())); + DataSpace superCellCellOffset = superCellIdx * SuperCellSize::toRT(); + + constexpr uint32_t cellsPerSuperCell = pmacc::math::CT::volume::type::value; + + auto forEachCellInSupercell = lockstep::makeForEach(worker); + + forEachCellInSupercell( + [&](int32_t const linearCellIdx) + { + DataSpace const cellIdx + = pmacc::math::mapToND(SuperCellSize::toRT(), linearCellIdx); + DataSpace const dataCellOffset = superCellCellOffset + cellIdx; + outBox[dataCellOffset] = func(outBox[dataCellOffset], boxes[dataCellOffset]...); + }); + } + }; + + inline void forEachCell(auto mapper, auto const& functor, auto& outBuffer, auto&... buffers) + { + PMACC_LOCKSTEP_KERNEL(ForEachKernel{}) + .config(mapper.getGridDim(), SuperCellSize{})( + mapper, + DeviceLambda{functor}, + outBuffer.getDataBox(), + buffers.getDataBox()...); + } + + void Poisson::preconditioner( + std::unique_ptr>& xBuffer, + std::unique_ptr>& bBuffer) + { + yBuffer->getDeviceBuffer().setValue(0.0); + wBuffer->getDeviceBuffer().setValue(0.0); + zBuffer->getDeviceBuffer().setValue(0.0); + + SubGrid const& subGrid = Environment::get().SubGrid(); + auto globalDomain = subGrid.getGlobalDomain().size; + auto cellSizeSquared = sim.pic.getCellSize() * sim.pic.getCellSize(); + + float_64 eigenMin = 0.0; + float_64 eigenMax = 0.0; + + for(uint32_t d = 0; d < simDim; ++d) + { + eigenMin += 4.0 * math::sin(1.0 * pmacc::math::Pi::halfValue / (globalDomain[d] + 1)) + * math::sin(1.0 * pmacc::math::Pi::halfValue / (globalDomain[d] + 1)) + / (cellSizeSquared[d]); + + eigenMax + += 4.0 + * math::sin(globalDomain[d] * pmacc::math::Pi::halfValue / (globalDomain[d] + 1)) + * math::sin(globalDomain[d] * pmacc::math::Pi::halfValue / (globalDomain[d] + 1)) + / (cellSizeSquared[d]); + } + + float_64 const theta = 0.5 * (eigenMax + eigenMin); + float_64 const delta = 0.5 * (eigenMax - eigenMin); + float_64 const sigma = theta / delta; + + float_64 rhoOld = 1. / sigma; + float_64 rhoCurrent = 1. / (2. * sigma - rhoOld); + + bBuffer->communication(); + + auto coreBorderMapper = makeAreaMapper(m_mappingDesc.value()); + + namespace poi = fields::poissonSolver; + + forEachCell( + coreBorderMapper, + [theta] DEVICEONLY(auto, auto const bValue) -> float_64 { return bValue / theta; }, + zBuffer->getDeviceBuffer(), + bBuffer->getDeviceBuffer()); + + poi::stencil( + coreBorderMapper, + poi::StencilFunc{}, + yBuffer->getDeviceBuffer(), + bBuffer->getDeviceBuffer()); + + forEachCell( + coreBorderMapper, + [rhoCurrent, theta, delta] DEVICEONLY(auto const yValue) -> float_64 + { return -2.0 * rhoCurrent * yValue / theta / delta; }, + yBuffer->getDeviceBuffer()); + + forEachCell( + coreBorderMapper, + [rhoCurrent, delta] DEVICEONLY(auto const yValue, auto const bValue) -> float_64 + { return yValue + 4.0 * bValue * rhoCurrent / delta; }, + yBuffer->getDeviceBuffer(), + bBuffer->getDeviceBuffer()); + + + uint32_t iterMax = m_maxPreconditionerSteps; + for(uint32_t i = 2; i < iterMax; ++i) + { + rhoOld = rhoCurrent; + rhoCurrent = 1. / (2. * sigma - rhoOld); + yBuffer->communication(); + + poi::stencil( + coreBorderMapper, + poi::StencilFunc{}, + wBuffer->getDeviceBuffer(), + yBuffer->getDeviceBuffer()); + + forEachCell( + coreBorderMapper, + [rhoCurrent, delta] DEVICEONLY(auto const wValue) -> float_64 + { return -2.0 * rhoCurrent * wValue / delta; }, + wBuffer->getDeviceBuffer()); + + forEachCell( + coreBorderMapper, + [rhoCurrent, delta] DEVICEONLY(auto const wValue, auto const bValue) -> float_64 + { return wValue + 2.0 * rhoCurrent * bValue / delta; }, + wBuffer->getDeviceBuffer(), + bBuffer->getDeviceBuffer()); + + forEachCell( + coreBorderMapper, + [rhoCurrent, sigma] DEVICEONLY(auto const wValue, auto const yValue) -> float_64 + { return wValue + 2.0 * rhoCurrent * sigma * yValue; }, + wBuffer->getDeviceBuffer(), + yBuffer->getDeviceBuffer()); + + forEachCell( + coreBorderMapper, + [rhoCurrent, rhoOld] DEVICEONLY(auto const wValue, auto const zValue) -> float_64 + { return wValue - rhoCurrent * rhoOld * zValue; }, + wBuffer->getDeviceBuffer(), + zBuffer->getDeviceBuffer()); + + zBuffer->getDeviceBuffer().copyFrom(yBuffer->getDeviceBuffer()); + yBuffer->getDeviceBuffer().copyFrom(wBuffer->getDeviceBuffer()); + } // loop + xBuffer->getDeviceBuffer().copyFrom(wBuffer->getDeviceBuffer()); + } + + void Poisson::operator()(uint32_t const currentStep) + { + namespace poi = fields::poissonSolver; + + // only one rank is writing status initial information of the poisson solver + bool mainRank = Environment::get().GridController().getGlobalRank() == 0; + if(mainRank) + log("Poisson solver:"); + if(!m_useSolver) + { + if(mainRank) + log(" - disabled"); + return; + } + eventSystem::getTransactionEvent().waitForFinished(); + auto beginT = std::chrono::high_resolution_clock::now(); + + using namespace pmacc; + constexpr uint fieldRhoSlot = 0; + DataConnector& dc = Environment<>::get().DataConnector(); + auto& fieldRho = *dc.get(FieldTmp::getUniqueId(fieldRhoSlot)); + + DataSpace numGuardCells = fieldRho.getGridLayout().guardSizeND(); + DataSpace coreBorderSize = fieldRho.getGridLayout().sizeWithoutGuardND(); + + using EligibleSpecies = pmacc::mp_filter; + /* calculate and add the charge density values from all species in FieldTmp */ + meta::ForEach< + EligibleSpecies, + detail::ComputeChargeDensity>, + boost::mpl::_1> + computeChargeDensity; + + fieldRho.getGridBuffer().getDeviceBuffer().setValue(FieldTmp::ValueType(0.0)); + computeChargeDensity(fieldRho, currentStep); + + /* add results of all species that are still in GUARD to next GPUs BORDER */ + EventTask fieldTmpEvent = fieldRho.asyncCommunication(eventSystem::getTransactionEvent()); + eventSystem::setTransactionEvent(fieldTmpEvent); + + auto boundaryConditionsDirichlet = poi::BoundaryConditionsDirichlet{}; + boundaryConditionsDirichlet(*fieldV.get(), m_mappingDesc.value()); + + auto rightHandSideNormalization = poi::RightHandSideNormalization{}; + rightHandSideNormalization(*fieldV.get(), fieldRho, m_mappingDesc.value()); + + + float_64 normRho; + { + auto rhoDeviceBox = fieldRho.getDeviceDataBox().shift(numGuardCells); + TransformDataBox fieldTransform( + [rhoDeviceBox] DEVICEONLY(DataSpace const& idx) -> float_64 + { return precisionCast(rhoDeviceBox[idx].x() * rhoDeviceBox[idx].x()); }); + + normRho = std::sqrt(reduceGlobal(coreBorderSize, fieldTransform)); + } + // recalculate rho + fieldRho.getGridBuffer().getDeviceBuffer().setValue(FieldTmp::ValueType(0.0)); + computeChargeDensity(fieldRho, currentStep); + /* add results of all species that are still in GUARD to next GPUs BORDER */ + eventSystem::setTransactionEvent(fieldRho.asyncCommunication(eventSystem::getTransactionEvent())); + + auto coreBorderMapper = makeAreaMapper(m_mappingDesc.value()); + + // normalize rho + forEachCell( + coreBorderMapper, + [normRho] DEVICEONLY(auto rhoValue) -> float_64 { return rhoValue.x() / normRho; }, + fieldRho.getGridBuffer().getDeviceBuffer()); + + auto vMapper = makeAreaMapper(m_mappingDesc.value()); + + // normalize v + forEachCell( + vMapper, + [normRho] DEVICEONLY(auto const vValue) -> float_64 { return vValue / normRho; }, + fieldV->fieldVBuffer->getDeviceBuffer()); + + poi::stencil( + coreBorderMapper, + poi::StencilFunc{}, + r0Buffer->getDeviceBuffer(), + fieldV->fieldVBuffer->getDeviceBuffer()); + + forEachCell( + coreBorderMapper, + [] DEVICEONLY(auto const r0Value, auto const rhoValue) -> float_64 + { return rhoValue.x() - r0Value; }, + r0Buffer->getDeviceBuffer(), + fieldRho.getGridBuffer().getDeviceBuffer()); + + pkBuffer->getDeviceBuffer().copyFrom(r0Buffer->getDeviceBuffer()); + rkBuffer->getDeviceBuffer().copyFrom(r0Buffer->getDeviceBuffer()); + + float_64 rho0; + /* rho reduction */ + { + auto r0Box = r0Buffer->getDeviceBuffer().getDataBox(); + auto r0BoxBorderGuard = r0Box.shift(numGuardCells); + + TransformDataBox fieldTransform( + [r0BoxBorderGuard] DEVICEONLY(DataSpace const& idx) -> float_64 + { return r0BoxBorderGuard[idx] * r0BoxBorderGuard[idx]; }); + + rho0 = reduceGlobal(coreBorderSize, fieldTransform); + } + + float_64 rho1 = rho0; + float_64 totalSum2; + + int maxIterations = m_maxSolverSteps; + + bool foundSolution = false; + int iteration = 0; + for(; iteration < maxIterations; ++iteration) + { + // preconditioner + if(m_disablePreconditioner) + mpkBuffer->getDeviceBuffer().copyFrom(pkBuffer->getDeviceBuffer()); + else + preconditioner(mpkBuffer, pkBuffer); + + mpkBuffer->communication(); + + // w = Ap + poi::stencil( + coreBorderMapper, + poi::StencilFunc{}, + ampkBuffer->getDeviceBuffer(), + mpkBuffer->getDeviceBuffer()); + + float_64 totalSum1; + /* local p = rw */ + { + auto r0Box = r0Buffer->getDeviceBuffer().getDataBox(); + auto r0BoxBorderGuard = r0Box.shift(numGuardCells); + + auto ampkBox = ampkBuffer->getDeviceBuffer().getDataBox(); + auto ampkBoxBorderGuard = ampkBox.shift(numGuardCells); + + TransformDataBox fieldTransform( + [r0BoxBorderGuard, ampkBoxBorderGuard] DEVICEONLY(DataSpace const& idx) -> float_64 + { return r0BoxBorderGuard[idx] * ampkBoxBorderGuard[idx]; }); + + totalSum1 = reduceGlobal(coreBorderSize, fieldTransform); + } + float_64 alpha = rho0 / totalSum1; + // r = r - alpha * w + forEachCell( + coreBorderMapper, + [alpha] DEVICEONLY(auto const rkValue, auto const ampkValue) -> float_64 + { return rkValue - alpha * ampkValue; }, + rkBuffer->getDeviceBuffer(), + ampkBuffer->getDeviceBuffer()); + + // preconditioner + if(m_disablePreconditioner) + zkBuffer->getDeviceBuffer().copyFrom(rkBuffer->getDeviceBuffer()); + else + preconditioner(zkBuffer, rkBuffer); + + zkBuffer->communication(); + + // t = A * r + poi::stencil( + coreBorderMapper, + poi::StencilFunc{}, + azkBuffer->getDeviceBuffer(), + zkBuffer->getDeviceBuffer()); + + /* totalSum1 = azk * rk */ + { + auto azkBox = azkBuffer->getDeviceBuffer().getDataBox(); + auto azkBoxBorderGuard = azkBox.shift(numGuardCells); + + auto rkBox = rkBuffer->getDeviceBuffer().getDataBox(); + auto rkBoxBorderGuard = rkBox.shift(numGuardCells); + + TransformDataBox fieldTransform( + [azkBoxBorderGuard, rkBoxBorderGuard] DEVICEONLY(DataSpace const& idx) -> float_64 + { return azkBoxBorderGuard[idx] * rkBoxBorderGuard[idx]; }); + + totalSum1 = reduceGlobal(coreBorderSize, fieldTransform); + } + + /* totalSum1 = azk * azk */ + { + auto azkBox = azkBuffer->getDeviceBuffer().getDataBox(); + auto azkBoxBorderGuard = azkBox.shift(numGuardCells); + + TransformDataBox fieldTransform( + [azkBoxBorderGuard] DEVICEONLY(DataSpace const& idx) -> float_64 + { return azkBoxBorderGuard[idx] * azkBoxBorderGuard[idx]; }); + + totalSum2 = reduceGlobal(coreBorderSize, fieldTransform); + } + + float_64 omega = totalSum1 / totalSum2; + + // v = v + alpha * mpk + omega * zk + forEachCell( + coreBorderMapper, + [alpha, + omega] DEVICEONLY(auto const vValue, auto const mpkValue, auto const zkValue) -> float_64 + { return vValue + alpha * mpkValue + omega * zkValue; }, + fieldV->fieldVBuffer->getDeviceBuffer(), + mpkBuffer->getDeviceBuffer(), + zkBuffer->getDeviceBuffer()); + + // rk = rk - omega * azk + forEachCell( + coreBorderMapper, + [omega] DEVICEONLY(auto const rkValue, auto const azkValue) -> float_64 + { return rkValue - omega * azkValue; }, + rkBuffer->getDeviceBuffer(), + azkBuffer->getDeviceBuffer()); + + /* totalSum1 = r0 * rk */ + { + auto r0Box = r0Buffer->getDeviceBuffer().getDataBox(); + auto r0BoxBorderGuard = r0Box.shift(numGuardCells); + + auto rkBox = rkBuffer->getDeviceBuffer().getDataBox(); + auto rkBoxBorderGuard = rkBox.shift(numGuardCells); + + TransformDataBox fieldTransform( + [r0BoxBorderGuard, rkBoxBorderGuard] DEVICEONLY(DataSpace const& idx) -> float_64 + { return r0BoxBorderGuard[idx] * rkBoxBorderGuard[idx]; }); + + totalSum1 = reduceGlobal(coreBorderSize, fieldTransform); + } + /* totalSum2 = rk * rk */ + { + auto rkBox = rkBuffer->getDeviceBuffer().getDataBox(); + auto rkBoxBorderGuard = rkBox.shift(numGuardCells); + + TransformDataBox fieldTransform( + [rkBoxBorderGuard] DEVICEONLY(DataSpace const& idx) -> float_64 + { return rkBoxBorderGuard[idx] * rkBoxBorderGuard[idx]; }); + + totalSum2 = reduceGlobal(coreBorderSize, fieldTransform); + } + + rho1 = totalSum1; + float_64 beta = rho1 / rho0 * alpha / omega; + rho0 = rho1; + if(std::sqrt(totalSum2) < m_solverEpsilon) + { + foundSolution = true; + break; + } + // pk = rk + beta * (pk - omega * ampk) + forEachCell( + coreBorderMapper, + [beta, + omega] DEVICEONLY(auto const pkValue, auto const rkValue, auto const ampkValue) -> float_64 + { return rkValue + beta * (pkValue - omega * ampkValue); }, + pkBuffer->getDeviceBuffer(), + rkBuffer->getDeviceBuffer(), + ampkBuffer->getDeviceBuffer()); + + } // for loop + + // avid deadlock due blocking colective MPI operation + eventSystem::getTransactionEvent().waitForFinished(); + + bool ioRank = mpiReduce.hasResult(mpi::reduceMethods::Reduce()); + + int maxGlobalIterations = 0; + mpiReduce( + pmacc::math::operation::Max(), + &maxGlobalIterations, + &iteration, + 1, + mpi::reduceMethods::Reduce()); + + float_64 maxGlobalNormRho = 0.0; + mpiReduce(pmacc::math::operation::Max(), &maxGlobalNormRho, &normRho, 1, mpi::reduceMethods::Reduce()); + + float_64 maxGlobalTotalSum2 = 0.0; + mpiReduce( + pmacc::math::operation::Max(), + &maxGlobalTotalSum2, + &totalSum2, + 1, + mpi::reduceMethods::Reduce()); + + if(foundSolution) + { + if(ioRank) + log(" - converged after %1%/%2% iterations with norm=%3%, total epsilon=%4%") + % maxGlobalIterations % maxIterations % maxGlobalNormRho % std::sqrt(maxGlobalTotalSum2); + + // normalize v back + forEachCell( + coreBorderMapper, + [normRho] DEVICEONLY(auto const vValue) -> float_64 + { return vValue * normRho / sim.pic.getEps0(); }, + fieldV->fieldVBuffer->getDeviceBuffer()); + fieldV->fieldVBuffer->communication(); + + // compute fieldE + auto& eField = *dc.get(FieldE::getName()); + + poi::stencil( + coreBorderMapper, + poi::GetFieldEStencil{}, + eField.getGridBuffer().getDeviceBuffer(), + fieldV->fieldVBuffer->getDeviceBuffer()); + setTransactionEvent(eField.asyncCommunication(eventSystem::getTransactionEvent())); + } + + eventSystem::getTransactionEvent().waitForFinished(); + auto endT = std::chrono::high_resolution_clock::now(); + double duration = std::chrono::duration(endT - beginT).count(); + + // avid deadlock due blocking collective MPI operation + eventSystem::getTransactionEvent().waitForFinished(); + double globalMaxDuration = 0.0; + mpiReduce( + pmacc::math::operation::Max(), + &globalMaxDuration, + &duration, + 1, + mpi::reduceMethods::Reduce()); + if(ioRank) + log(" - duration %1% sec") % globalMaxDuration; + + if(ioRank && !foundSolution) + { + log(" - not converge after %1% iterations with norm=%2%, total epsilon=%3%") + % maxIterations % normRho % std::sqrt(rho1); + throw std::runtime_error("Poisson solver did not converge after max iterations"); + } + } + } // namespace stage + } // namespace simulation +} // namespace picongpu diff --git a/include/pmacc/boundary/Utility.hpp b/include/pmacc/boundary/Utility.hpp index f919f43016..7a97664781 100644 --- a/include/pmacc/boundary/Utility.hpp +++ b/include/pmacc/boundary/Utility.hpp @@ -33,7 +33,7 @@ namespace pmacc * * @param exchangeType number characterizing exchange @see pmacc::type::ExchangeType */ - HDINLINE bool isAxisAligned(uint32_t exchangeType) + constexpr bool isAxisAligned(uint32_t exchangeType) { return (FRONT % exchangeType == 0); } diff --git a/include/pmacc/device/Reduce.hpp b/include/pmacc/device/Reduce.hpp index af6a40785b..80c82f9d56 100644 --- a/include/pmacc/device/Reduce.hpp +++ b/include/pmacc/device/Reduce.hpp @@ -57,6 +57,13 @@ namespace pmacc { } + void tmpBufferAlloc() + { + // lazy allocation of the result buffer + if(!reduceBuffer) + reduceBuffer = std::make_unique>(DataSpace(byte)); + } + /** Reduce elements in global gpu memory * * @param func binary functor for reduce which takes two arguments, first argument is the source and @@ -89,8 +96,7 @@ namespace pmacc threads = n; // lazy allocation of the result buffer - if(!reduceBuffer) - reduceBuffer = std::make_unique>(DataSpace(byte)); + tmpBufferAlloc(); auto* dest = (Type*) reduceBuffer->getDeviceBuffer().data(); diff --git a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/fileOutput.param b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/fileOutput.param new file mode 100644 index 0000000000..1b3793315b --- /dev/null +++ b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/fileOutput.param @@ -0,0 +1,123 @@ +/* Copyright 2013-2024 Axel Huebl, Rene Widera, Felix Schmitt, + * Benjamin Worpitz, Richard Pausch, Pawel Ordyna + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include + +/* some forward declarations we need */ +#include "picongpu/fields/Fields.def" +#include "picongpu/particles/filter/filter.def" +#include "picongpu/particles/particleToGrid/CombinedDerive.def" +#include "picongpu/particles/particleToGrid/ComputeGridValuePerFrame.def" + +namespace picongpu +{ + /** FieldTmp output (calculated at runtime) ******************************* + * + * Those operations derive scalar field quantities from particle species + * at runtime. Each value is mapped per cell. Some operations are identical + * up to a constant, so avoid writing those twice to save storage. + * + * you can choose any of these particle to grid projections: + * - Density: particle position + shape on the grid + * - BoundElectronDensity: density of bound electrons + * note: only makes sense for partially ionized ions + * - ChargeDensity: density * charge + * note: for species that do not change their charge state, this is + * the same as the density times a constant for the charge + * - Energy: sum of kinetic particle energy per cell with respect to shape + * - EnergyDensity: average kinetic particle energy per cell times the + * particle density + * note: this is the same as the sum of kinetic particle energy + * divided by a constant for the cell volume + * - Momentum: sum of chosen component of momentum per cell with respect to shape + * - MomentumDensity: average chosen component of momentum per cell times the particle density + * note: this is the same as the sum of the chosen momentum component + * divided by a constant for the cell volume + * - LarmorPower: radiated Larmor power + * (species must contain the attribute `momentumPrev1`) + * + * for debugging: + * - MidCurrentDensityComponent: + * density * charge * velocity_component + * - Counter: counts point like particles per cell + * - MacroCounter: counts point like macro particles per cell + * + * combined attributes: + * These attributes are defined as a function of two primary derived attributes. + * + * - AverageAttribute: template to compute a per particle average of any derived value + * - RelativisticDensity: equals to 1/gamma^2 * n. Where gamma is the Lorentz factor for the mean kinetic energy + * in the cell and n ist the usual number density. Useful for Faraday Rotation calculation. + * + * Example use: + * @code{.cpp} + * using AverageEnergy_Seq = deriveField::CreateEligible_t< + * VectorAllSpecies, + * deriveField::combinedAttributes::AverageAttribute>; + * using RelativisticDensity_Seq + * = deriveField::CreateEligible_t; + * @endcode + * + * Filtering: + * You can create derived fields from filtered particles. Only particles passing + * the filter will contribute to the field quantity. + * For that you need to define your filters in `particleFilters.param` and pass a + * filter as the 3rd (optional) template argument in `CreateEligible_t`. + * + * Example: This will create charge density field for all species that are + * eligible for the this attribute and the chosen filter. + * @code{.cpp} + * using ChargeDensity_Seq + = deriveField::CreateEligible_t< VectorAllSpecies, + deriveField::derivedAttributes::ChargeDensity, filter::FilterOfYourChoice>; + * @endcode + */ + namespace deriveField = particles::particleToGrid; + + /* ChargeDensity section */ + using ChargeDensity_Seq + = deriveField::CreateEligible_t; + + /** FieldTmpSolvers groups all solvers that create data for FieldTmp ****** + * + * FieldTmpSolvers is used in @see FieldTmp to calculate the exchange size + */ + using FieldTmpSolvers = MakeSeq_t; + + + /** FileOutputFields: Groups all Fields that shall be dumped *************/ + + /** Possible native fields: FieldE, FieldB, FieldJ + */ + using NativeFileOutputFields = MakeSeq_t; + + using FileOutputFields = MakeSeq_t; + + + /** FileOutputParticles: Groups all Species that shall be dumped ********** + * + * hint: to disable particle output set to + * using FileOutputParticles = MakeSeq_t< >; + */ + using FileOutputParticles = MakeSeq_t<>; + +} // namespace picongpu diff --git a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/particle.param b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/particle.param index 33e866d718..acbb710dfe 100644 --- a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/particle.param +++ b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/particle.param @@ -48,20 +48,20 @@ namespace picongpu * * Vector is automatically reduced to two dimensions for 2D (x,y) simulations. */ - struct QuietParam + struct RandomParameter { - /** Count of macro-particles per cell per direction at initial state + /** Maximum number of macro-particles per cell during density profile evaluation. * - * unit: none */ - using numParticlesPerDimension = mCT::shrinkTo< - mCT::Int, - simDim>::type; + * Determines the weighting of a macro particle as well as the number of + * macro-particles which sample the evolution of the particle distribution + * function in phase space. + * + * unit: none + */ + static constexpr uint32_t numParticlesPerCell = 25u; }; - /** Definition of Quiet start position functor that positions macro-particles regularly on the grid. - * No random number generator used. - */ - using Quiet = QuietImpl; + using Random = RandomImpl; } // namespace startPosition diff --git a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/speciesInitialization.param b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/speciesInitialization.param index 7b6b819c38..12ccda19dc 100644 --- a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/speciesInitialization.param +++ b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/speciesInitialization.param @@ -39,8 +39,8 @@ namespace picongpu * the functors are called in order (from first to last functor) */ using InitPipeline = pmacc::mp_list< - CreateDensity, - Derive, + CreateDensity, + CreateDensity, Manipulate, Manipulate, Manipulate,