diff --git a/include/picongpu/fields/Fields.def b/include/picongpu/fields/Fields.def
index 2744b6aac7..777c6e94d8 100644
--- a/include/picongpu/fields/Fields.def
+++ b/include/picongpu/fields/Fields.def
@@ -68,4 +68,8 @@ namespace picongpu
     /** Current Density j, @see FieldJ.hpp */
     class FieldJ;
 
+    namespace fields::poissonSolver
+    {
+        struct FieldV;
+    } // namespace fields::poissonSolver
 } // namespace picongpu
diff --git a/include/picongpu/fields/Fields.hpp b/include/picongpu/fields/Fields.hpp
index 8ab7dd9305..dee16e1fe7 100644
--- a/include/picongpu/fields/Fields.hpp
+++ b/include/picongpu/fields/Fields.hpp
@@ -25,3 +25,4 @@
 #include "picongpu/fields/FieldJ.hpp"
 #include "picongpu/fields/FieldTmp.hpp"
 #include "picongpu/fields/Fields.def"
+#include "picongpu/fields/poissonSolver/FieldV.hpp"
diff --git a/include/picongpu/fields/poissonSolver/BICGStab.hpp b/include/picongpu/fields/poissonSolver/BICGStab.hpp
new file mode 100644
index 0000000000..95dcac28fa
--- /dev/null
+++ b/include/picongpu/fields/poissonSolver/BICGStab.hpp
@@ -0,0 +1,35 @@
+/* Copyright 2025 Tapish Narwal, Luca Pennati, Rene Widera
+ *
+ * This file is part of PIConGPU.
+ *
+ * PIConGPU is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * PIConGPU is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with PIConGPU.
+ * If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragme once
+
+#include "picongpu/defines.hpp"
+#include "picongpu/fields/FieldTmpOperations.hpp"
+
+struct BICGStab
+{
+    // return residual
+    // return number of iterations
+    void operator()(FieldTmp& fieldV, FieldTmp& fiedlRho, MappingDesk* cellDescription)
+    {
+        // set boundary conditions on fieldV (Dirichlet or Neuman)
+
+        // normalize the problem based on norm(fieldRho)
+    }
+};
diff --git a/include/picongpu/fields/poissonSolver/BoundaryConditions.hpp b/include/picongpu/fields/poissonSolver/BoundaryConditions.hpp
new file mode 100644
index 0000000000..663b58538a
--- /dev/null
+++ b/include/picongpu/fields/poissonSolver/BoundaryConditions.hpp
@@ -0,0 +1,118 @@
+/* Copyright 2025 Tapish Narwal, Luca Pennati, Rene Widera
+ *
+ * This file is part of PIConGPU.
+ *
+ * PIConGPU is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * PIConGPU is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with PIConGPU.
+ * If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "picongpu/defines.hpp"
+#include "picongpu/fields/FieldTmpOperations.hpp"
+#include "picongpu/fields/poissonSolver/FieldV.hpp"
+
+#include <pmacc/lockstep/lockstep.hpp>
+#include <pmacc/mappings/kernel/ExchangeMapping.hpp>
+#include <pmacc/memory/dataTypes/Mask.hpp>
+
+namespace picongpu::fields::poissonSolver
+{
+    struct SolutionFunction
+    {
+        HDINLINE auto operator()(math::Vector<double, simDim> const& totalCellCoordinate) const
+        {
+            if constexpr(simDim == 3u)
+            {
+                return math::sin(totalCellCoordinate.x()) + math::cos(totalCellCoordinate.y())
+                       + 3.0 * math::sin(totalCellCoordinate.z())
+                       + totalCellCoordinate.x() * totalCellCoordinate.productOfComponents() + 10.0;
+            }
+            else if constexpr(simDim == 2u)
+            {
+                return math::sin(totalCellCoordinate.x()) + math::cos(totalCellCoordinate.y())
+                       + totalCellCoordinate.x() * totalCellCoordinate.productOfComponents() + 10.0;
+            }
+        }
+    };
+
+    struct ApplyDirichletBCsFromFunctionKernel
+    {
+        DINLINE auto operator()(
+            auto const& worker,
+            auto fieldVBox,
+            auto const boundaryFunction,
+            DataSpace<simDim> cellOffsetToTotalOrigin,
+            auto const mapper) const -> void
+        {
+            // including guards
+            DataSpace<simDim> const superCellIdx(mapper.getSuperCellIndex(worker.blockDomIdxND()));
+
+            DataSpace<simDim> numGuardCells = mapper.getGuardingSuperCells() * SuperCellSize::toRT();
+
+            // no guards included
+            DataSpace<simDim> superCellTotalCellOffset
+                = cellOffsetToTotalOrigin + superCellIdx * SuperCellSize::toRT() - numGuardCells;
+
+            constexpr uint32_t cellsPerSuperCell = pmacc::math::CT::volume<SuperCellSize>::type::value;
+
+            auto forEachCellInSupercell = lockstep::makeForEach<cellsPerSuperCell>(worker);
+
+            forEachCellInSupercell(
+                [&](int32_t const linearCellIdx)
+                {
+                    /* cell index within the superCell */
+                    DataSpace<simDim> const cellIdx = pmacc::math::mapToND(SuperCellSize::toRT(), linearCellIdx);
+                    // without guards
+                    DataSpace<simDim> const totalCellIdx = superCellTotalCellOffset + cellIdx;
+
+                    auto totalDistance = precisionCast<float_64>(totalCellIdx)
+                                         * precisionCast<float_64>(sim.pic.getCellSize().shrink<simDim>());
+
+                    fieldVBox(superCellIdx * SuperCellSize::toRT() + cellIdx) = boundaryFunction(totalDistance);
+                });
+        }
+    };
+
+    struct BoundaryConditionsDirichlet
+    {
+        // return residual
+        // return number of iterations
+        void operator()(FieldV& fieldV, MappingDesc cellDescription) const
+        {
+            SubGrid<simDim> const& subGrid = Environment<simDim>::get().SubGrid();
+            auto globalDomain = subGrid.getGlobalDomain();
+            auto localDomain = subGrid.getLocalDomain();
+
+            auto cellOffsetToTotalOrigin = globalDomain.offset + localDomain.offset;
+
+
+            for(uint32_t i = 1; i < NumberOfExchanges<simDim>::value; ++i)
+            {
+                /* only call for planes: left right top bottom back front*/
+                if(FRONT % i == 0 && !(Environment<simDim>::get().GridController().getCommunicationMask().isSet(i)))
+                {
+                    ExchangeMapping<GUARD, MappingDesc> mapper(cellDescription, i);
+
+                    PMACC_LOCKSTEP_KERNEL(ApplyDirichletBCsFromFunctionKernel{})
+                        .config(mapper.getGridDim(), SuperCellSize{})(
+                            fieldV.fieldVBuffer->getDeviceBuffer().getDataBox(),
+                            SolutionFunction{},
+                            cellOffsetToTotalOrigin,
+                            mapper);
+                }
+            }
+        }
+    };
+} // namespace picongpu::fields::poissonSolver
diff --git a/include/picongpu/fields/poissonSolver/ChargeDeposition.hpp b/include/picongpu/fields/poissonSolver/ChargeDeposition.hpp
new file mode 100644
index 0000000000..e6b0bda3e4
--- /dev/null
+++ b/include/picongpu/fields/poissonSolver/ChargeDeposition.hpp
@@ -0,0 +1,20 @@
+/* Copyright 2025 Tapish Narwal, Luca Pennati, Rene Widera
+ *
+ * This file is part of PIConGPU.
+ *
+ * PIConGPU is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * PIConGPU is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with PIConGPU.
+ * If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
diff --git a/include/picongpu/fields/poissonSolver/FieldV.hpp b/include/picongpu/fields/poissonSolver/FieldV.hpp
new file mode 100644
index 0000000000..73e93a6d32
--- /dev/null
+++ b/include/picongpu/fields/poissonSolver/FieldV.hpp
@@ -0,0 +1,121 @@
+/* Copyright 2025 Tapish Narwal, Luca Pennati, Rene Widera
+ *
+ * This file is part of PIConGPU.
+ *
+ * PIConGPU is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * PIConGPU is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with PIConGPU.
+ * If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "picongpu/defines.hpp"
+#include "picongpu/fields/YeeCell.hpp"
+#include "picongpu/traits/FieldPosition.hpp"
+#include "picongpu/traits/SIBaseUnits.hpp"
+
+#include <pmacc/memory/buffers/GridBuffer.hpp>
+
+namespace picongpu::fields::poissonSolver
+{
+    struct FieldV : pmacc::ISimulationData
+    {
+        using ValueType = float_64;
+
+        std::shared_ptr<GridBuffer<ValueType, simDim>> fieldVBuffer;
+
+        //! Unit type of field components
+        using UnitValueType = float1_64;
+
+        using DataBoxType = pmacc::DataBox<PitchedBox<ValueType, simDim>>;
+
+        //! Number of components of ValueType, for serialization
+        static constexpr int numComponents = 1u;
+
+        FieldV(picongpu::MappingDesc const mappingDesc)
+            : fieldVBuffer{std::make_shared<GridBuffer<float_64, simDim>>(mappingDesc.getGridLayout())}
+        {
+        }
+
+        void synchronize() override
+        {
+            fieldVBuffer->getHostBuffer().copyFrom(fieldVBuffer->getDeviceBuffer());
+        };
+
+        //! Get the host data box for the field values
+        DataBoxType getHostDataBox()
+        {
+            return fieldVBuffer->getHostBuffer().getDataBox();
+        }
+
+        //! Get the device data box for the field values
+        DataBoxType getDeviceDataBox()
+        {
+            return fieldVBuffer->getDeviceBuffer().getDataBox();
+        }
+
+        GridLayout<simDim> getGridLayout()
+        {
+            return fieldVBuffer->getGridLayout();
+        }
+
+        /**
+         * Return the globally unique identifier for this simulation data.
+         *
+         * @return globally unique identifier
+         */
+        SimulationDataId getUniqueId() override
+        {
+            return "FieldV";
+        };
+
+        static std::string getName()
+        {
+            return "FieldV";
+        }
+
+        static UnitValueType getUnit()
+        {
+            return UnitValueType{sim.unit.eField() * sim.unit.length()};
+        }
+
+        static std::vector<float_64> getUnitDimension()
+        {
+            /* V is in volts: V  = kg * m^2 / (A * s^3)
+             *   -> L^2 * M * T^-3 * I^-1
+             */
+            std::vector<float_64> unitDimension(7, 0.0);
+            unitDimension.at(SIBaseUnits::length) = 2.0;
+            unitDimension.at(SIBaseUnits::mass) = 1.0;
+            unitDimension.at(SIBaseUnits::time) = -3.0;
+            unitDimension.at(SIBaseUnits::electricCurrent) = -1.0;
+            return unitDimension;
+        }
+    };
+} // namespace picongpu::fields::poissonSolver
+
+namespace picongpu::traits
+{
+    template<>
+    struct FieldPosition<fields::YeeCell, fields::poissonSolver::FieldV, simDim>
+    {
+        using VectorVectorDD = ::pmacc::math::Vector<floatD_X, simDim> const;
+
+        HDINLINE FieldPosition() = default;
+
+        HDINLINE VectorVectorDD operator()() const
+        {
+            return VectorVectorDD::create(floatD_X::create(0.0));
+        }
+    };
+} // namespace picongpu::traits
diff --git a/include/picongpu/fields/poissonSolver/RightHandSideNormalization.hpp b/include/picongpu/fields/poissonSolver/RightHandSideNormalization.hpp
new file mode 100644
index 0000000000..a5fe7d6840
--- /dev/null
+++ b/include/picongpu/fields/poissonSolver/RightHandSideNormalization.hpp
@@ -0,0 +1,95 @@
+/* Copyright 2025 Tapish Narwal, Luca Pennati, Rene Widera
+ *
+ * This file is part of PIConGPU.
+ *
+ * PIConGPU is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * PIConGPU is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with PIConGPU.
+ * If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "picongpu/defines.hpp"
+#include "picongpu/fields/FieldTmpOperations.hpp"
+#include "picongpu/fields/poissonSolver/FieldV.hpp"
+
+#include <pmacc/lockstep/lockstep.hpp>
+#include <pmacc/mappings/kernel/ExchangeMapping.hpp>
+#include <pmacc/math/operation.hpp>
+#include <pmacc/memory/dataTypes/Mask.hpp>
+
+namespace picongpu::fields::poissonSolver
+{
+    struct RightHandSideNormalizationKernel
+    {
+        DINLINE auto operator()(auto const& worker, auto const fieldVBox, auto fieldRhoBox, auto const mapper) const
+            -> void
+        {
+            DataSpace<simDim> const relExchangeDir = Mask::getRelativeDirections<simDim>(mapper.getExchangeType());
+            DataSpace<simDim> const superCellIdx(mapper.getSuperCellIndex(worker.blockDomIdxND()));
+            DataSpace<simDim> superCellCellOffset = superCellIdx * SuperCellSize::toRT();
+
+            DataSpace<simDim> numGuardCells = mapper.getGuardingSuperCells() * SuperCellSize::toRT();
+            DataSpace<simDim> adjustedSuperCellCellOffset = superCellCellOffset - (relExchangeDir * numGuardCells);
+
+            DataSpace<simDim> numCellsLocalDomain = mapper.getGridSuperCells() * SuperCellSize::toRT();
+
+            constexpr uint32_t cellsPerSuperCell = pmacc::math::CT::volume<SuperCellSize>::type::value;
+
+            auto forEachCellInSupercell = lockstep::makeForEach<cellsPerSuperCell>(worker);
+
+            forEachCellInSupercell(
+                [&](int32_t const linearCellIdx)
+                {
+                    /* cell index within the superCell */
+                    DataSpace<simDim> const cellIdx = pmacc::math::mapToND(SuperCellSize::toRT(), linearCellIdx);
+
+                    DataSpace<simDim> const localCellIdx = superCellCellOffset + cellIdx;
+                    DataSpace<simDim> const dataCellIdx = adjustedSuperCellCellOffset + cellIdx;
+
+                    for(uint32_t d = 0; d < simDim; ++d)
+                    {
+                        if(relExchangeDir[d] != 0
+                           && (localCellIdx[d] == 0u || localCellIdx[d] == numCellsLocalDomain[d] - 1))
+                        {
+                            fieldRhoBox(dataCellIdx) += fieldVBox(dataCellIdx + relExchangeDir)
+                                                        / (sim.pic.getCellSize()[d] * sim.pic.getCellSize()[d]);
+                        }
+                    }
+                });
+        }
+    };
+
+    struct RightHandSideNormalization
+    {
+        // return residual
+        // return number of iterations
+        void operator()(FieldV& fieldV, FieldTmp& fieldRho, MappingDesc cellDescription)
+        {
+            /* only call for planes: left right top bottom back front*/
+            for(uint32_t i = 1; i < NumberOfExchanges<simDim>::value; ++i)
+            {
+                if(FRONT % i == 0 && !(Environment<simDim>::get().GridController().getCommunicationMask().isSet(i)))
+                {
+                    ExchangeMapping<GUARD, MappingDesc> mapper(cellDescription, i);
+
+                    PMACC_LOCKSTEP_KERNEL(RightHandSideNormalizationKernel{})
+                        .config(mapper.getGridDim(), SuperCellSize{})(
+                            fieldV.fieldVBuffer->getDeviceBuffer().getDataBox(),
+                            fieldRho.getDeviceDataBox(),
+                            mapper);
+                }
+            }
+        }
+    };
+} // namespace picongpu::fields::poissonSolver
diff --git a/include/picongpu/fields/poissonSolver/Stencil.hpp b/include/picongpu/fields/poissonSolver/Stencil.hpp
new file mode 100644
index 0000000000..4b98714241
--- /dev/null
+++ b/include/picongpu/fields/poissonSolver/Stencil.hpp
@@ -0,0 +1,147 @@
+/* Copyright 2025 Tapish Narwal, Luca Pennati, Rene Widera
+ *
+ * This file is part of PIConGPU.
+ *
+ * PIConGPU is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * PIConGPU is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with PIConGPU.
+ * If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "picongpu/defines.hpp"
+#include "picongpu/fields/FieldTmpOperations.hpp"
+#include "picongpu/fields/poissonSolver/FieldV.hpp"
+
+#include <pmacc/lockstep/lockstep.hpp>
+#include <pmacc/mappings/kernel/ExchangeMapping.hpp>
+#include <pmacc/memory/dataTypes/Mask.hpp>
+
+namespace picongpu::fields::poissonSolver
+{
+    struct StencilFunc
+    {
+        HDINLINE auto operator()(auto fieldIn) const
+        {
+            constexpr auto cellSize = sim.pic.getCellSize().shrink<simDim>();
+            constexpr auto cellSizeSquared = cellSize * cellSize;
+            float_64 const fc0 = 2.0 * (1.0 / (cellSizeSquared)).sumOfComponents();
+            if constexpr(simDim == 3u)
+            {
+                constexpr DataSpace<3u> xDir(1, 0, 0);
+                constexpr DataSpace<3u> yDir(0, 1, 0);
+                constexpr DataSpace<3u> zDir(0, 0, 1);
+
+
+                return *fieldIn * fc0 - (fieldIn(-xDir) + fieldIn(xDir)) / cellSizeSquared.x()
+                       - (fieldIn(-yDir) + fieldIn(yDir)) / cellSizeSquared.y()
+                       - (fieldIn(-zDir) + fieldIn(zDir)) / cellSizeSquared.z();
+            }
+            else if constexpr(simDim == 2u)
+            {
+                constexpr DataSpace<2u> xDir(1, 0);
+                constexpr DataSpace<2u> yDir(0, 1);
+
+                return *fieldIn * fc0 - (fieldIn(-xDir) + fieldIn(xDir)) / cellSizeSquared.x()
+                       - (fieldIn(-yDir) + fieldIn(yDir)) / cellSizeSquared.y();
+            }
+        }
+    };
+
+    struct GetFieldEStencil
+    {
+        HDINLINE auto operator()(auto fieldIn) const
+        {
+            constexpr auto cellSize = sim.pic.getCellSize().shrink<simDim>();
+
+            float3_X eValue;
+            if constexpr(simDim == 3u)
+            {
+                constexpr DataSpace<3u> xDir(1, 0, 0);
+                constexpr DataSpace<3u> yDir(0, 1, 0);
+                constexpr DataSpace<3u> zDir(0, 0, 1);
+
+                eValue.x() = (*fieldIn - fieldIn(xDir)) / cellSize.x();
+                eValue.y() = (*fieldIn - fieldIn(yDir)) / cellSize.y();
+                eValue.z() = (*fieldIn - fieldIn(zDir)) / cellSize.z();
+
+                return eValue;
+            }
+            else if constexpr(simDim == 2u)
+            {
+                constexpr DataSpace<3u> xDir(1, 0, 0);
+                constexpr DataSpace<3u> yDir(0, 1, 0);
+
+                eValue.x() = (*fieldIn - fieldIn(xDir)) / cellSize.x();
+                eValue.y() = (*fieldIn - fieldIn(yDir)) / cellSize.y();
+                eValue.z() = 0.0_X;
+
+                return eValue;
+            }
+        }
+    };
+
+    struct Stencil
+    {
+        DINLINE auto operator()(
+            auto const& worker,
+            auto const mapper,
+            auto const stencilFunctor,
+            auto fieldOut,
+            auto fieldIn) const -> void
+        {
+            DataSpace<simDim> const superCellIdx(mapper.getSuperCellIndex(worker.blockDomIdxND()));
+            DataSpace<simDim> superCellCellOffset = superCellIdx * SuperCellSize::toRT();
+
+            using Type = typename decltype(fieldIn)::ValueType;
+            using BlockArea = pmacc::SuperCellDescription<
+                SuperCellSize,
+                typename pmacc::math::CT::make_Int<SuperCellSize::dim, 1>::type,
+                typename pmacc::math::CT::make_Int<SuperCellSize::dim, 1>::type>;
+
+            constexpr uint32_t cellsPerSuperCell = pmacc::math::CT::volume<SuperCellSize>::type::value;
+
+            // use the cached buffer, beacuse I am doing multiple reads, moves the blockArea to shared memory
+            auto cache = pmacc::CachedBox::create<0, Type>(worker, BlockArea());
+            auto buffShifted = fieldIn.shift(superCellCellOffset);
+
+            // the thread collective is a convenience wrapper for lockstep make for each
+            // it deals with the guard offset, subtracts the origin offset
+            auto collective = pmacc::makeThreadCollective<BlockArea>();
+
+            pmacc::math::operation::Assign assign;
+            collective(worker, assign, cache, buffShifted);
+
+            worker.sync();
+
+            auto forEachCellInSupercell = lockstep::makeForEach<cellsPerSuperCell>(worker);
+
+            forEachCellInSupercell(
+                [&](int32_t const linearCellIdx)
+                {
+                    /* cell index within the superCell */
+                    DataSpace<simDim> const cellIdx = pmacc::math::mapToND(SuperCellSize::toRT(), linearCellIdx);
+                    fieldOut[superCellCellOffset + cellIdx] = stencilFunctor(cache.shift(cellIdx));
+                });
+        }
+    };
+
+    inline void stencil(auto mapper, auto const& functor, auto& outBuffer, auto& inBuffer)
+    {
+        // poisson stencil
+        PMACC_LOCKSTEP_KERNEL(fields::poissonSolver::Stencil{})
+            .config(
+                mapper.getGridDim(),
+                SuperCellSize{})(mapper, functor, outBuffer.getDataBox(), inBuffer.getDataBox());
+    }
+} // namespace picongpu::fields::poissonSolver
diff --git a/include/picongpu/main.x.cpp b/include/picongpu/main.x.cpp
index e4ff97b354..f31973e3df 100644
--- a/include/picongpu/main.x.cpp
+++ b/include/picongpu/main.x.cpp
@@ -44,25 +44,30 @@ int runSimulation(int argc, char** argv)
 {
     using namespace picongpu;
 
-    auto sim = ::picongpu::
-        SimulationStarter<::picongpu::InitialiserController, ::picongpu::PluginController, ::picongpu::Simulation>{};
-    auto const parserStatus = sim.parseConfigs(argc, argv);
     int errorCode = EXIT_FAILURE;
-
-    switch(parserStatus)
     {
-    case ArgsParser::Status::error:
-        errorCode = EXIT_FAILURE;
-        break;
-    case ArgsParser::Status::success:
-        sim.load();
-        sim.start();
-        sim.unload();
-        [[fallthrough]];
-    case ArgsParser::Status::successExit:
-        errorCode = 0;
-        break;
-    };
+        auto sim = ::picongpu::SimulationStarter<
+            ::picongpu::InitialiserController,
+            ::picongpu::PluginController,
+            ::picongpu::Simulation>{};
+        auto const parserStatus = sim.parseConfigs(argc, argv);
+
+
+        switch(parserStatus)
+        {
+        case ArgsParser::Status::error:
+            errorCode = EXIT_FAILURE;
+            break;
+        case ArgsParser::Status::success:
+            sim.load();
+            sim.start();
+            sim.unload();
+            [[fallthrough]];
+        case ArgsParser::Status::successExit:
+            errorCode = 0;
+            break;
+        };
+    }
 
     // finalize the pmacc context */
     pmacc::Environment<>::get().finalize();
diff --git a/include/picongpu/simulation/control/Simulation.hpp b/include/picongpu/simulation/control/Simulation.hpp
index 451c07d3b8..8af9173341 100644
--- a/include/picongpu/simulation/control/Simulation.hpp
+++ b/include/picongpu/simulation/control/Simulation.hpp
@@ -53,6 +53,7 @@
 #include "picongpu/simulation/stage/ParticleInit.hpp"
 #include "picongpu/simulation/stage/ParticleIonization.hpp"
 #include "picongpu/simulation/stage/ParticlePush.hpp"
+#include "picongpu/simulation/stage/Poisson.hpp"
 #include "picongpu/simulation/stage/RuntimeDensityFile.hpp"
 #include "picongpu/simulation/stage/SynchrotronRadiation.hpp"
 #include "picongpu/versionFormat.hpp"
@@ -153,6 +154,9 @@ namespace picongpu
             fieldBackground.registerHelp(desc);
             particleBoundaries.registerHelp(desc);
             runtimeDensityFile.registerHelp(desc);
+
+            poissonSolver = std::make_shared<simulation::stage::Poisson>();
+            poissonSolver->registerHelp(desc);
         }
 
         virtual void startSimulation() override
@@ -348,6 +352,9 @@ namespace picongpu
             // initialize runtime density file paths
             runtimeDensityFile.init();
 
+            // create memory for poisson solver
+            poissonSolver->init(*cellDescription);
+
             // create factory for the random number generator
             uint32_t const userSeed = random::seed::ISeed<random::SeedGenerator>{}();
             uint32_t const seed = std::hash<std::string>{}(std::to_string(userSeed));
@@ -490,6 +497,7 @@ namespace picongpu
                     initialiserController->init();
                     simulation::stage::ParticleInit{}(step);
                     (*atomicPhysics).fixAtomicStateInit(*cellDescription);
+                    (*poissonSolver)(step);
                     // Check Debye resolution
                     particles::debyeLength::check(*cellDescription);
                 }
@@ -632,6 +640,8 @@ namespace picongpu
         // Because of it, has a special init() method that has to be called during initialization of the simulation
         simulation::stage::RuntimeDensityFile runtimeDensityFile;
 
+        std::shared_ptr<simulation::stage::Poisson> poissonSolver;
+
         IInitPlugin* initialiserController{nullptr};
 
         std::unique_ptr<MappingDesc> cellDescription;
diff --git a/include/picongpu/simulation/stage/Poisson.hpp b/include/picongpu/simulation/stage/Poisson.hpp
new file mode 100644
index 0000000000..4b5d9d98dd
--- /dev/null
+++ b/include/picongpu/simulation/stage/Poisson.hpp
@@ -0,0 +1,104 @@
+/* Copyright 2025 Tapish Narwal, Luca Pennati, Rene Widera
+ *
+ * This file is part of PIConGPU.
+ *
+ * PIConGPU is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * PIConGPU is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with PIConGPU.
+ * If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "picongpu/defines.hpp"
+#include "picongpu/fields/FieldJ.hpp"
+#include "picongpu/fields/FieldJ.kernel"
+#include "picongpu/fields/FieldTmpOperations.hpp"
+#include "picongpu/fields/currentDeposition/Deposit.hpp"
+#include "picongpu/fields/poissonSolver/FieldV.hpp"
+#include "picongpu/particles/filter/filter.hpp"
+#include "picongpu/particles/param.hpp"
+
+#include <pmacc/Environment.hpp>
+#include <pmacc/dataManagement/DataConnector.hpp>
+#include <pmacc/device/Reduce.hpp>
+#include <pmacc/math/operation.hpp>
+#include <pmacc/memory/buffers/GridBuffer.hpp>
+#include <pmacc/meta/ForEach.hpp>
+#include <pmacc/mpi/MPIReduce.hpp>
+#include <pmacc/mpi/reduceMethods/Reduce.hpp>
+#include <pmacc/particles/traits/FilterByFlag.hpp>
+#include <pmacc/type/Area.hpp>
+
+#include <boost/program_options.hpp>
+
+#include <cstdint>
+
+namespace picongpu::simulation::stage
+{
+    //! Functor for the stage of the PIC loop performing charge deposition
+    struct Poisson
+    {
+        Poisson();
+
+        void init(picongpu::MappingDesc const mappingDesc);
+
+        void registerHelp(po::options_description& desc);
+
+        /** Compute the current created by particles and add it to the current
+         *  density
+         *
+         * @param currentStep index of time iteration
+         */
+        void operator()(uint32_t const currentStep);
+
+        void preconditioner(
+            std::unique_ptr<GridBuffer<float_64, simDim>>& xBuffer,
+            std::unique_ptr<GridBuffer<float_64, simDim>>& bBuffer);
+
+    private:
+        void participate(bool status)
+        {
+            mpiReduce.participate(status);
+        }
+
+        template<typename T_ReduceFunc = pmacc::math::operation::Add>
+        auto reduceGlobal(DataSpace<simDim> fieldSize, auto dataBoxIn, T_ReduceFunc reduceFunctor = T_ReduceFunc{});
+
+        std::unique_ptr<GridBuffer<float_64, simDim>> pkBuffer;
+        std::unique_ptr<GridBuffer<float_64, simDim>> rkBuffer;
+        std::unique_ptr<GridBuffer<float_64, simDim>> r0Buffer;
+        std::unique_ptr<GridBuffer<float_64, simDim>> mpkBuffer;
+        std::unique_ptr<GridBuffer<float_64, simDim>> ampkBuffer;
+        std::unique_ptr<GridBuffer<float_64, simDim>> zkBuffer;
+        std::unique_ptr<GridBuffer<float_64, simDim>> azkBuffer;
+
+        std::unique_ptr<GridBuffer<float_64, simDim>> yBuffer;
+        std::unique_ptr<GridBuffer<float_64, simDim>> wBuffer;
+        std::unique_ptr<GridBuffer<float_64, simDim>> zBuffer;
+
+        std::shared_ptr<fields::poissonSolver::FieldV> fieldV;
+
+        std::optional<picongpu::MappingDesc> m_mappingDesc;
+
+        mpi::MPIReduce mpiReduce;
+        std::unique_ptr<pmacc::device::Reduce> localReduce;
+
+        // defaults will be overwritten by command line arguments
+        bool m_useSolver = false;
+        uint32_t m_maxSolverSteps = 20;
+        float_64 m_solverEpsilon = 1.0e-8;
+
+        bool m_disablePreconditioner = false;
+        uint32_t m_maxPreconditionerSteps = 20;
+    };
+} // namespace picongpu::simulation::stage
diff --git a/include/picongpu/simulation/stage/Poisson.x.cpp b/include/picongpu/simulation/stage/Poisson.x.cpp
new file mode 100644
index 0000000000..bc92e90e98
--- /dev/null
+++ b/include/picongpu/simulation/stage/Poisson.x.cpp
@@ -0,0 +1,750 @@
+/* Copyright 2025 Tapish Narwal, Luca Pennati, Rene Widera
+ *
+ * This file is part of PIConGPU.
+ *
+ * PIConGPU is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * PIConGPU is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with PIConGPU.
+ * If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "picongpu/simulation/stage/Poisson.hpp"
+
+#include "picongpu/defines.hpp"
+#include "picongpu/fields/FieldJ.hpp"
+#include "picongpu/fields/FieldJ.kernel"
+#include "picongpu/fields/FieldTmpOperations.hpp"
+#include "picongpu/fields/currentDeposition/Deposit.hpp"
+#include "picongpu/fields/poissonSolver/BoundaryConditions.hpp"
+#include "picongpu/fields/poissonSolver/RightHandSideNormalization.hpp"
+#include "picongpu/fields/poissonSolver/Stencil.hpp"
+#include "picongpu/particles/filter/filter.hpp"
+#include "picongpu/particles/param.hpp"
+#include "picongpu/particles/particleToGrid/CombinedDerive.hpp"
+#include "picongpu/particles/particleToGrid/ComputeGridValuePerFrame.hpp"
+#include "picongpu/simulation/stage/Poisson.hpp"
+
+#include <pmacc/Environment.hpp>
+#include <pmacc/dataManagement/DataConnector.hpp>
+#include <pmacc/memory/boxes/DataBoxDim1Access.hpp>
+#include <pmacc/memory/boxes/DataBoxUnaryTransform.hpp>
+#include <pmacc/meta/ForEach.hpp>
+#include <pmacc/particles/traits/FilterByFlag.hpp>
+#include <pmacc/type/Area.hpp>
+
+#include <chrono>
+#include <cstdint>
+#include <tuple>
+
+#include <picongpu/param/particle.param>
+
+namespace picongpu::simulation::stage
+{
+    template<typename T_Func>
+    struct DeviceLambda
+    {
+        T_Func const func;
+
+        template<typename... T>
+        DEVICEONLY auto operator()(T&&... args) const
+        {
+            return func(std::forward<T>(args)...);
+        }
+
+        template<typename... T>
+        DEVICEONLY auto operator()(T&&... args)
+        {
+            return func(std::forward<T>(args)...);
+        }
+    };
+
+    template<typename T_Func>
+    DeviceLambda(T_Func const) -> DeviceLambda<T_Func>;
+} // namespace picongpu::simulation::stage
+
+namespace alpaka
+{
+    template<typename T>
+    struct IsKernelArgumentTriviallyCopyable<picongpu::simulation::stage::DeviceLambda<T>, void> : std::true_type
+    {
+    };
+} // namespace alpaka
+
+namespace picongpu
+{
+    namespace simulation
+    {
+        namespace stage
+        {
+            namespace detail
+            {
+
+                template<typename T_SpeciesType, typename T_Area>
+                struct ComputeChargeDensity
+                {
+                    using SpeciesType = pmacc::particles::meta::FindByNameOrType_t<VectorAllSpecies, T_SpeciesType>;
+                    static uint32_t const area = T_Area::value;
+
+                    HINLINE void operator()(FieldTmp& fieldTmp, uint32_t const currentStep) const
+                    {
+                        DataConnector& dc = Environment<>::get().DataConnector();
+
+                        /* load species without copying the particle data to the host */
+                        auto speciesTmp = dc.get<SpeciesType>(SpeciesType::FrameType::getName());
+
+                        /* run algorithm */
+                        using ChargeDensitySolver = typename particles::particleToGrid::CreateFieldTmpOperation_t<
+                            SpeciesType,
+                            particles::particleToGrid::derivedAttributes::ChargeDensity>::Solver;
+
+                        computeFieldTmpValue<area, ChargeDensitySolver>(fieldTmp, *speciesTmp, currentStep);
+                    }
+                };
+
+            } // namespace detail
+
+            namespace deriveField = particles::particleToGrid;
+            template<typename T>
+            using SpeciesEligibleForChargeConservation = typename particles::traits::
+                SpeciesEligibleForSolver<T, deriveField::derivedAttributes::ChargeDensity>::type;
+
+            Poisson::Poisson() : localReduce{std::make_unique<pmacc::device::Reduce>(1024)}
+            {
+            }
+
+            void Poisson::registerHelp(po::options_description& desc)
+            {
+                namespace po = boost::program_options;
+                po::options_description solverDesc("Poisson solver");
+
+                solverDesc.add_options()(
+                    "poisson.activate",
+                    po::value<bool>(&m_useSolver)->zero_tokens(),
+                    "enable poisson solver");
+                solverDesc.add_options()(
+                    "poisson.maxSteps",
+                    po::value<uint32_t>(&m_maxSolverSteps)->default_value(2000),
+                    "maximum number of steps for the preconditioner");
+                solverDesc.add_options()(
+                    "poisson.epsilon",
+                    po::value<float_64>(&m_solverEpsilon)->default_value(1.0e-8),
+                    "maximal allowed error of the poisson solver");
+                // preconitioner
+                solverDesc.add_options()(
+                    "poisson.preconditioner.disable",
+                    po::value<bool>(&m_disablePreconditioner)->zero_tokens(),
+                    "disable poisson solver preconditioner");
+                solverDesc.add_options()(
+                    "poisson.preconditioner.maxSteps",
+                    po::value<uint32_t>(&m_maxPreconditionerSteps)->default_value(20),
+                    "maximum number of steps for the preconditioner");
+                desc.add(solverDesc);
+            }
+
+            void Poisson::init(MappingDesc const mappingDesc)
+            {
+                m_mappingDesc = std::make_optional<MappingDesc>(mappingDesc);
+
+                if(m_useSolver)
+                {
+                    pkBuffer = std::make_unique<GridBuffer<float_64, simDim>>(m_mappingDesc->getGridLayout());
+                    rkBuffer = std::make_unique<GridBuffer<float_64, simDim>>(m_mappingDesc->getGridLayout());
+                    r0Buffer = std::make_unique<GridBuffer<float_64, simDim>>(m_mappingDesc->getGridLayout());
+                    mpkBuffer = std::make_unique<GridBuffer<float_64, simDim>>(m_mappingDesc->getGridLayout());
+                    ampkBuffer = std::make_unique<GridBuffer<float_64, simDim>>(m_mappingDesc->getGridLayout());
+                    zkBuffer = std::make_unique<GridBuffer<float_64, simDim>>(m_mappingDesc->getGridLayout());
+                    azkBuffer = std::make_unique<GridBuffer<float_64, simDim>>(m_mappingDesc->getGridLayout());
+                    fieldV = std::make_shared<fields::poissonSolver::FieldV>(m_mappingDesc.value());
+
+                    yBuffer = std::make_unique<GridBuffer<float_64, simDim>>(m_mappingDesc->getGridLayout());
+                    wBuffer = std::make_unique<GridBuffer<float_64, simDim>>(m_mappingDesc->getGridLayout());
+                    zBuffer = std::make_unique<GridBuffer<float_64, simDim>>(m_mappingDesc->getGridLayout());
+
+                    auto const commTag0 = pmacc::traits::getUniqueId<uint32_t>();
+                    auto const commTag1 = pmacc::traits::getUniqueId<uint32_t>();
+                    auto const commTagPk = pmacc::traits::getUniqueId<uint32_t>();
+                    auto const commTagRk = pmacc::traits::getUniqueId<uint32_t>();
+                    auto const commTagFieldV = pmacc::traits::getUniqueId<uint32_t>();
+
+                    auto const commTagY = pmacc::traits::getUniqueId<uint32_t>();
+                    /*go over all directions*/
+                    for(uint32_t i = 1; i < NumberOfExchanges<simDim>::value; ++i)
+                    {
+                        // set communication only for planes
+                        if(FRONT % i == 0)
+                        {
+                            DataSpace<simDim> relativeMask = Mask::getRelativeDirections<simDim>(i);
+                            /* guarding cells depend on direction
+                             * for negative direction use originGuard else endGuard (relative direction ZERO is
+                             * ignored) don't switch end and origin because this is a read buffer and no send buffer
+                             */
+                            auto guardingCells = DataSpace<simDim>::create(0);
+                            for(uint32_t d = 0; d < simDim; ++d)
+                                guardingCells[d] = (relativeMask[d] == 0 ? 0 : 1);
+                            mpkBuffer->addExchange(GUARD, i, guardingCells, commTag0);
+                            zkBuffer->addExchange(GUARD, i, guardingCells, commTag1);
+                            pkBuffer->addExchange(GUARD, i, guardingCells, commTagPk);
+                            rkBuffer->addExchange(GUARD, i, guardingCells, commTagRk);
+                            fieldV->fieldVBuffer->addExchange(GUARD, i, guardingCells, commTagFieldV);
+
+                            yBuffer->addExchange(GUARD, i, guardingCells, commTagY);
+                        }
+                    }
+                    DataConnector& dc = Environment<>::get().DataConnector();
+                    dc.share(fieldV);
+
+                    participate(true);
+                }
+            }
+
+            template<typename T_TranformFunctor>
+            class TransformDataBox : private T_TranformFunctor
+            {
+            public:
+                using ValueType = decltype(std::declval<T_TranformFunctor>()(DataSpace<simDim>::create(0)));
+
+                static constexpr std::uint32_t Dim = simDim;
+
+                HDINLINE TransformDataBox() = default;
+
+                HDINLINE TransformDataBox(T_TranformFunctor transformFunc) : T_TranformFunctor(transformFunc)
+                {
+                }
+
+                HDINLINE TransformDataBox(TransformDataBox const&) = default;
+
+                HDINLINE ValueType operator()(DataSpace<simDim> const& idx) const
+                {
+                    return T_TranformFunctor::operator()(idx + m_offset);
+                }
+
+                HDINLINE ValueType operator[](DataSpace<simDim> const idx) const
+                {
+                    return T_TranformFunctor::operator()(idx + m_offset);
+                }
+
+                HDINLINE TransformDataBox shift(DataSpace<simDim> const& offset) const
+                {
+                    TransformDataBox result(*this);
+                    result.m_offset += offset;
+                    return result;
+                }
+
+                DataSpace<simDim> m_offset = DataSpace<simDim>::create(0);
+            };
+
+            template<typename T_TranformFunctor>
+            inline auto Poisson::reduceGlobal(
+                DataSpace<simDim> fieldSize,
+                auto dataBoxIn,
+                T_TranformFunctor reduceFunctor)
+            {
+                DataBoxDim1Access d1Access(dataBoxIn, fieldSize);
+
+                float_64 resultLocal = (*localReduce)(reduceFunctor, d1Access, fieldSize.productOfComponents());
+
+                // avoid deadlock between not finished pmacc tasks and mpi blocking collectives
+                eventSystem::getTransactionEvent().waitForFinished();
+                float_64 resultGlobal;
+                mpiReduce(reduceFunctor, &resultGlobal, &resultLocal, 1, mpi::reduceMethods::AllReduce());
+
+                return resultGlobal;
+            }
+
+            struct ForEachKernel
+            {
+                DINLINE auto operator()(
+                    auto const& worker,
+                    auto const& mapper,
+                    auto const& func,
+                    auto outBox,
+                    auto... boxes) const -> void
+                {
+                    DataSpace<simDim> const superCellIdx(mapper.getSuperCellIndex(worker.blockDomIdxND()));
+                    DataSpace<simDim> superCellCellOffset = superCellIdx * SuperCellSize::toRT();
+
+                    constexpr uint32_t cellsPerSuperCell = pmacc::math::CT::volume<SuperCellSize>::type::value;
+
+                    auto forEachCellInSupercell = lockstep::makeForEach<cellsPerSuperCell>(worker);
+
+                    forEachCellInSupercell(
+                        [&](int32_t const linearCellIdx)
+                        {
+                            DataSpace<simDim> const cellIdx
+                                = pmacc::math::mapToND(SuperCellSize::toRT(), linearCellIdx);
+                            DataSpace<simDim> const dataCellOffset = superCellCellOffset + cellIdx;
+                            outBox[dataCellOffset] = func(outBox[dataCellOffset], boxes[dataCellOffset]...);
+                        });
+                }
+            };
+
+            inline void forEachCell(auto mapper, auto const& functor, auto& outBuffer, auto&... buffers)
+            {
+                PMACC_LOCKSTEP_KERNEL(ForEachKernel{})
+                    .config(mapper.getGridDim(), SuperCellSize{})(
+                        mapper,
+                        DeviceLambda{functor},
+                        outBuffer.getDataBox(),
+                        buffers.getDataBox()...);
+            }
+
+            void Poisson::preconditioner(
+                std::unique_ptr<GridBuffer<float_64, simDim>>& xBuffer,
+                std::unique_ptr<GridBuffer<float_64, simDim>>& bBuffer)
+            {
+                yBuffer->getDeviceBuffer().setValue(0.0);
+                wBuffer->getDeviceBuffer().setValue(0.0);
+                zBuffer->getDeviceBuffer().setValue(0.0);
+
+                SubGrid<simDim> const& subGrid = Environment<simDim>::get().SubGrid();
+                auto globalDomain = subGrid.getGlobalDomain().size;
+                auto cellSizeSquared = sim.pic.getCellSize<float_64>() * sim.pic.getCellSize<float_64>();
+
+                float_64 eigenMin = 0.0;
+                float_64 eigenMax = 0.0;
+
+                for(uint32_t d = 0; d < simDim; ++d)
+                {
+                    eigenMin += 4.0 * math::sin(1.0 * pmacc::math::Pi<float_64>::halfValue / (globalDomain[d] + 1))
+                                * math::sin(1.0 * pmacc::math::Pi<float_64>::halfValue / (globalDomain[d] + 1))
+                                / (cellSizeSquared[d]);
+
+                    eigenMax
+                        += 4.0
+                           * math::sin(globalDomain[d] * pmacc::math::Pi<float_64>::halfValue / (globalDomain[d] + 1))
+                           * math::sin(globalDomain[d] * pmacc::math::Pi<float_64>::halfValue / (globalDomain[d] + 1))
+                           / (cellSizeSquared[d]);
+                }
+
+                float_64 const theta = 0.5 * (eigenMax + eigenMin);
+                float_64 const delta = 0.5 * (eigenMax - eigenMin);
+                float_64 const sigma = theta / delta;
+
+                float_64 rhoOld = 1. / sigma;
+                float_64 rhoCurrent = 1. / (2. * sigma - rhoOld);
+
+                bBuffer->communication();
+
+                auto coreBorderMapper = makeAreaMapper<CORE + BORDER>(m_mappingDesc.value());
+
+                namespace poi = fields::poissonSolver;
+
+                forEachCell(
+                    coreBorderMapper,
+                    [theta] DEVICEONLY(auto, auto const bValue) -> float_64 { return bValue / theta; },
+                    zBuffer->getDeviceBuffer(),
+                    bBuffer->getDeviceBuffer());
+
+                poi::stencil(
+                    coreBorderMapper,
+                    poi::StencilFunc{},
+                    yBuffer->getDeviceBuffer(),
+                    bBuffer->getDeviceBuffer());
+
+                forEachCell(
+                    coreBorderMapper,
+                    [rhoCurrent, theta, delta] DEVICEONLY(auto const yValue) -> float_64
+                    { return -2.0 * rhoCurrent * yValue / theta / delta; },
+                    yBuffer->getDeviceBuffer());
+
+                forEachCell(
+                    coreBorderMapper,
+                    [rhoCurrent, delta] DEVICEONLY(auto const yValue, auto const bValue) -> float_64
+                    { return yValue + 4.0 * bValue * rhoCurrent / delta; },
+                    yBuffer->getDeviceBuffer(),
+                    bBuffer->getDeviceBuffer());
+
+
+                uint32_t iterMax = m_maxPreconditionerSteps;
+                for(uint32_t i = 2; i < iterMax; ++i)
+                {
+                    rhoOld = rhoCurrent;
+                    rhoCurrent = 1. / (2. * sigma - rhoOld);
+                    yBuffer->communication();
+
+                    poi::stencil(
+                        coreBorderMapper,
+                        poi::StencilFunc{},
+                        wBuffer->getDeviceBuffer(),
+                        yBuffer->getDeviceBuffer());
+
+                    forEachCell(
+                        coreBorderMapper,
+                        [rhoCurrent, delta] DEVICEONLY(auto const wValue) -> float_64
+                        { return -2.0 * rhoCurrent * wValue / delta; },
+                        wBuffer->getDeviceBuffer());
+
+                    forEachCell(
+                        coreBorderMapper,
+                        [rhoCurrent, delta] DEVICEONLY(auto const wValue, auto const bValue) -> float_64
+                        { return wValue + 2.0 * rhoCurrent * bValue / delta; },
+                        wBuffer->getDeviceBuffer(),
+                        bBuffer->getDeviceBuffer());
+
+                    forEachCell(
+                        coreBorderMapper,
+                        [rhoCurrent, sigma] DEVICEONLY(auto const wValue, auto const yValue) -> float_64
+                        { return wValue + 2.0 * rhoCurrent * sigma * yValue; },
+                        wBuffer->getDeviceBuffer(),
+                        yBuffer->getDeviceBuffer());
+
+                    forEachCell(
+                        coreBorderMapper,
+                        [rhoCurrent, rhoOld] DEVICEONLY(auto const wValue, auto const zValue) -> float_64
+                        { return wValue - rhoCurrent * rhoOld * zValue; },
+                        wBuffer->getDeviceBuffer(),
+                        zBuffer->getDeviceBuffer());
+
+                    zBuffer->getDeviceBuffer().copyFrom(yBuffer->getDeviceBuffer());
+                    yBuffer->getDeviceBuffer().copyFrom(wBuffer->getDeviceBuffer());
+                } // loop
+                xBuffer->getDeviceBuffer().copyFrom(wBuffer->getDeviceBuffer());
+            }
+
+            void Poisson::operator()(uint32_t const currentStep)
+            {
+                namespace poi = fields::poissonSolver;
+
+                // only one rank is writing status initial information of the poisson solver
+                bool mainRank = Environment<simDim>::get().GridController().getGlobalRank() == 0;
+                if(mainRank)
+                    log<picLog::PHYSICS>("Poisson solver:");
+                if(!m_useSolver)
+                {
+                    if(mainRank)
+                        log<picLog::PHYSICS>("  - disabled");
+                    return;
+                }
+                eventSystem::getTransactionEvent().waitForFinished();
+                auto beginT = std::chrono::high_resolution_clock::now();
+
+                using namespace pmacc;
+                constexpr uint fieldRhoSlot = 0;
+                DataConnector& dc = Environment<>::get().DataConnector();
+                auto& fieldRho = *dc.get<FieldTmp>(FieldTmp::getUniqueId(fieldRhoSlot));
+
+                DataSpace<simDim> numGuardCells = fieldRho.getGridLayout().guardSizeND();
+                DataSpace<simDim> coreBorderSize = fieldRho.getGridLayout().sizeWithoutGuardND();
+
+                using EligibleSpecies = pmacc::mp_filter<SpeciesEligibleForChargeConservation, VectorAllSpecies>;
+                /* calculate and add the charge density values from all species in FieldTmp */
+                meta::ForEach<
+                    EligibleSpecies,
+                    detail::ComputeChargeDensity<boost::mpl::_1, pmacc::mp_int<CORE + BORDER>>,
+                    boost::mpl::_1>
+                    computeChargeDensity;
+
+                fieldRho.getGridBuffer().getDeviceBuffer().setValue(FieldTmp::ValueType(0.0));
+                computeChargeDensity(fieldRho, currentStep);
+
+                /* add results of all species that are still in GUARD to next GPUs BORDER */
+                EventTask fieldTmpEvent = fieldRho.asyncCommunication(eventSystem::getTransactionEvent());
+                eventSystem::setTransactionEvent(fieldTmpEvent);
+
+                auto boundaryConditionsDirichlet = poi::BoundaryConditionsDirichlet{};
+                boundaryConditionsDirichlet(*fieldV.get(), m_mappingDesc.value());
+
+                auto rightHandSideNormalization = poi::RightHandSideNormalization{};
+                rightHandSideNormalization(*fieldV.get(), fieldRho, m_mappingDesc.value());
+
+
+                float_64 normRho;
+                {
+                    auto rhoDeviceBox = fieldRho.getDeviceDataBox().shift(numGuardCells);
+                    TransformDataBox fieldTransform(
+                        [rhoDeviceBox] DEVICEONLY(DataSpace<simDim> const& idx) -> float_64
+                        { return precisionCast<float_64>(rhoDeviceBox[idx].x() * rhoDeviceBox[idx].x()); });
+
+                    normRho = std::sqrt(reduceGlobal(coreBorderSize, fieldTransform));
+                }
+                // recalculate rho
+                fieldRho.getGridBuffer().getDeviceBuffer().setValue(FieldTmp::ValueType(0.0));
+                computeChargeDensity(fieldRho, currentStep);
+                /* add results of all species that are still in GUARD to next GPUs BORDER */
+                eventSystem::setTransactionEvent(fieldRho.asyncCommunication(eventSystem::getTransactionEvent()));
+
+                auto coreBorderMapper = makeAreaMapper<CORE + BORDER>(m_mappingDesc.value());
+
+                // normalize rho
+                forEachCell(
+                    coreBorderMapper,
+                    [normRho] DEVICEONLY(auto rhoValue) -> float_64 { return rhoValue.x() / normRho; },
+                    fieldRho.getGridBuffer().getDeviceBuffer());
+
+                auto vMapper = makeAreaMapper<GUARD>(m_mappingDesc.value());
+
+                // normalize v
+                forEachCell(
+                    vMapper,
+                    [normRho] DEVICEONLY(auto const vValue) -> float_64 { return vValue / normRho; },
+                    fieldV->fieldVBuffer->getDeviceBuffer());
+
+                poi::stencil(
+                    coreBorderMapper,
+                    poi::StencilFunc{},
+                    r0Buffer->getDeviceBuffer(),
+                    fieldV->fieldVBuffer->getDeviceBuffer());
+
+                forEachCell(
+                    coreBorderMapper,
+                    [] DEVICEONLY(auto const r0Value, auto const rhoValue) -> float_64
+                    { return rhoValue.x() - r0Value; },
+                    r0Buffer->getDeviceBuffer(),
+                    fieldRho.getGridBuffer().getDeviceBuffer());
+
+                pkBuffer->getDeviceBuffer().copyFrom(r0Buffer->getDeviceBuffer());
+                rkBuffer->getDeviceBuffer().copyFrom(r0Buffer->getDeviceBuffer());
+
+                float_64 rho0;
+                /* rho reduction */
+                {
+                    auto r0Box = r0Buffer->getDeviceBuffer().getDataBox();
+                    auto r0BoxBorderGuard = r0Box.shift(numGuardCells);
+
+                    TransformDataBox fieldTransform(
+                        [r0BoxBorderGuard] DEVICEONLY(DataSpace<simDim> const& idx) -> float_64
+                        { return r0BoxBorderGuard[idx] * r0BoxBorderGuard[idx]; });
+
+                    rho0 = reduceGlobal(coreBorderSize, fieldTransform);
+                }
+
+                float_64 rho1 = rho0;
+                float_64 totalSum2;
+
+                int maxIterations = m_maxSolverSteps;
+
+                bool foundSolution = false;
+                int iteration = 0;
+                for(; iteration < maxIterations; ++iteration)
+                {
+                    // preconditioner
+                    if(m_disablePreconditioner)
+                        mpkBuffer->getDeviceBuffer().copyFrom(pkBuffer->getDeviceBuffer());
+                    else
+                        preconditioner(mpkBuffer, pkBuffer);
+
+                    mpkBuffer->communication();
+
+                    // w = Ap
+                    poi::stencil(
+                        coreBorderMapper,
+                        poi::StencilFunc{},
+                        ampkBuffer->getDeviceBuffer(),
+                        mpkBuffer->getDeviceBuffer());
+
+                    float_64 totalSum1;
+                    /* local p = rw */
+                    {
+                        auto r0Box = r0Buffer->getDeviceBuffer().getDataBox();
+                        auto r0BoxBorderGuard = r0Box.shift(numGuardCells);
+
+                        auto ampkBox = ampkBuffer->getDeviceBuffer().getDataBox();
+                        auto ampkBoxBorderGuard = ampkBox.shift(numGuardCells);
+
+                        TransformDataBox fieldTransform(
+                            [r0BoxBorderGuard, ampkBoxBorderGuard] DEVICEONLY(DataSpace<simDim> const& idx) -> float_64
+                            { return r0BoxBorderGuard[idx] * ampkBoxBorderGuard[idx]; });
+
+                        totalSum1 = reduceGlobal(coreBorderSize, fieldTransform);
+                    }
+                    float_64 alpha = rho0 / totalSum1;
+                    // r = r - alpha * w
+                    forEachCell(
+                        coreBorderMapper,
+                        [alpha] DEVICEONLY(auto const rkValue, auto const ampkValue) -> float_64
+                        { return rkValue - alpha * ampkValue; },
+                        rkBuffer->getDeviceBuffer(),
+                        ampkBuffer->getDeviceBuffer());
+
+                    // preconditioner
+                    if(m_disablePreconditioner)
+                        zkBuffer->getDeviceBuffer().copyFrom(rkBuffer->getDeviceBuffer());
+                    else
+                        preconditioner(zkBuffer, rkBuffer);
+
+                    zkBuffer->communication();
+
+                    // t = A * r
+                    poi::stencil(
+                        coreBorderMapper,
+                        poi::StencilFunc{},
+                        azkBuffer->getDeviceBuffer(),
+                        zkBuffer->getDeviceBuffer());
+
+                    /* totalSum1 = azk * rk */
+                    {
+                        auto azkBox = azkBuffer->getDeviceBuffer().getDataBox();
+                        auto azkBoxBorderGuard = azkBox.shift(numGuardCells);
+
+                        auto rkBox = rkBuffer->getDeviceBuffer().getDataBox();
+                        auto rkBoxBorderGuard = rkBox.shift(numGuardCells);
+
+                        TransformDataBox fieldTransform(
+                            [azkBoxBorderGuard, rkBoxBorderGuard] DEVICEONLY(DataSpace<simDim> const& idx) -> float_64
+                            { return azkBoxBorderGuard[idx] * rkBoxBorderGuard[idx]; });
+
+                        totalSum1 = reduceGlobal(coreBorderSize, fieldTransform);
+                    }
+
+                    /* totalSum1 = azk * azk */
+                    {
+                        auto azkBox = azkBuffer->getDeviceBuffer().getDataBox();
+                        auto azkBoxBorderGuard = azkBox.shift(numGuardCells);
+
+                        TransformDataBox fieldTransform(
+                            [azkBoxBorderGuard] DEVICEONLY(DataSpace<simDim> const& idx) -> float_64
+                            { return azkBoxBorderGuard[idx] * azkBoxBorderGuard[idx]; });
+
+                        totalSum2 = reduceGlobal(coreBorderSize, fieldTransform);
+                    }
+
+                    float_64 omega = totalSum1 / totalSum2;
+
+                    // v = v + alpha * mpk + omega * zk
+                    forEachCell(
+                        coreBorderMapper,
+                        [alpha,
+                         omega] DEVICEONLY(auto const vValue, auto const mpkValue, auto const zkValue) -> float_64
+                        { return vValue + alpha * mpkValue + omega * zkValue; },
+                        fieldV->fieldVBuffer->getDeviceBuffer(),
+                        mpkBuffer->getDeviceBuffer(),
+                        zkBuffer->getDeviceBuffer());
+
+                    // rk = rk -  omega * azk
+                    forEachCell(
+                        coreBorderMapper,
+                        [omega] DEVICEONLY(auto const rkValue, auto const azkValue) -> float_64
+                        { return rkValue - omega * azkValue; },
+                        rkBuffer->getDeviceBuffer(),
+                        azkBuffer->getDeviceBuffer());
+
+                    /* totalSum1 = r0 * rk */
+                    {
+                        auto r0Box = r0Buffer->getDeviceBuffer().getDataBox();
+                        auto r0BoxBorderGuard = r0Box.shift(numGuardCells);
+
+                        auto rkBox = rkBuffer->getDeviceBuffer().getDataBox();
+                        auto rkBoxBorderGuard = rkBox.shift(numGuardCells);
+
+                        TransformDataBox fieldTransform(
+                            [r0BoxBorderGuard, rkBoxBorderGuard] DEVICEONLY(DataSpace<simDim> const& idx) -> float_64
+                            { return r0BoxBorderGuard[idx] * rkBoxBorderGuard[idx]; });
+
+                        totalSum1 = reduceGlobal(coreBorderSize, fieldTransform);
+                    }
+                    /* totalSum2 = rk * rk */
+                    {
+                        auto rkBox = rkBuffer->getDeviceBuffer().getDataBox();
+                        auto rkBoxBorderGuard = rkBox.shift(numGuardCells);
+
+                        TransformDataBox fieldTransform(
+                            [rkBoxBorderGuard] DEVICEONLY(DataSpace<simDim> const& idx) -> float_64
+                            { return rkBoxBorderGuard[idx] * rkBoxBorderGuard[idx]; });
+
+                        totalSum2 = reduceGlobal(coreBorderSize, fieldTransform);
+                    }
+
+                    rho1 = totalSum1;
+                    float_64 beta = rho1 / rho0 * alpha / omega;
+                    rho0 = rho1;
+                    if(std::sqrt(totalSum2) < m_solverEpsilon)
+                    {
+                        foundSolution = true;
+                        break;
+                    }
+                    // pk = rk + beta * (pk - omega * ampk)
+                    forEachCell(
+                        coreBorderMapper,
+                        [beta,
+                         omega] DEVICEONLY(auto const pkValue, auto const rkValue, auto const ampkValue) -> float_64
+                        { return rkValue + beta * (pkValue - omega * ampkValue); },
+                        pkBuffer->getDeviceBuffer(),
+                        rkBuffer->getDeviceBuffer(),
+                        ampkBuffer->getDeviceBuffer());
+
+                } // for loop
+
+                // avid deadlock due blocking colective MPI operation
+                eventSystem::getTransactionEvent().waitForFinished();
+
+                bool ioRank = mpiReduce.hasResult(mpi::reduceMethods::Reduce());
+
+                int maxGlobalIterations = 0;
+                mpiReduce(
+                    pmacc::math::operation::Max(),
+                    &maxGlobalIterations,
+                    &iteration,
+                    1,
+                    mpi::reduceMethods::Reduce());
+
+                float_64 maxGlobalNormRho = 0.0;
+                mpiReduce(pmacc::math::operation::Max(), &maxGlobalNormRho, &normRho, 1, mpi::reduceMethods::Reduce());
+
+                float_64 maxGlobalTotalSum2 = 0.0;
+                mpiReduce(
+                    pmacc::math::operation::Max(),
+                    &maxGlobalTotalSum2,
+                    &totalSum2,
+                    1,
+                    mpi::reduceMethods::Reduce());
+
+                if(foundSolution)
+                {
+                    if(ioRank)
+                        log<picLog::PHYSICS>("  - converged after %1%/%2% iterations with norm=%3%, total epsilon=%4%")
+                            % maxGlobalIterations % maxIterations % maxGlobalNormRho % std::sqrt(maxGlobalTotalSum2);
+
+                    // normalize v back
+                    forEachCell(
+                        coreBorderMapper,
+                        [normRho] DEVICEONLY(auto const vValue) -> float_64
+                        { return vValue * normRho / sim.pic.getEps0(); },
+                        fieldV->fieldVBuffer->getDeviceBuffer());
+                    fieldV->fieldVBuffer->communication();
+
+                    // compute fieldE
+                    auto& eField = *dc.get<FieldE>(FieldE::getName());
+
+                    poi::stencil(
+                        coreBorderMapper,
+                        poi::GetFieldEStencil{},
+                        eField.getGridBuffer().getDeviceBuffer(),
+                        fieldV->fieldVBuffer->getDeviceBuffer());
+                    setTransactionEvent(eField.asyncCommunication(eventSystem::getTransactionEvent()));
+                }
+
+                eventSystem::getTransactionEvent().waitForFinished();
+                auto endT = std::chrono::high_resolution_clock::now();
+                double duration = std::chrono::duration<double>(endT - beginT).count();
+
+                // avid deadlock due blocking collective MPI operation
+                eventSystem::getTransactionEvent().waitForFinished();
+                double globalMaxDuration = 0.0;
+                mpiReduce(
+                    pmacc::math::operation::Max(),
+                    &globalMaxDuration,
+                    &duration,
+                    1,
+                    mpi::reduceMethods::Reduce());
+                if(ioRank)
+                    log<picLog::PHYSICS>("  - duration %1% sec") % globalMaxDuration;
+
+                if(ioRank && !foundSolution)
+                {
+                    log<picLog::PHYSICS>("  - not converge after %1% iterations with norm=%2%, total epsilon=%3%")
+                        % maxIterations % normRho % std::sqrt(rho1);
+                    throw std::runtime_error("Poisson solver did not converge after max iterations");
+                }
+            }
+        } // namespace stage
+    } // namespace simulation
+} // namespace picongpu
diff --git a/include/pmacc/boundary/Utility.hpp b/include/pmacc/boundary/Utility.hpp
index f919f43016..7a97664781 100644
--- a/include/pmacc/boundary/Utility.hpp
+++ b/include/pmacc/boundary/Utility.hpp
@@ -33,7 +33,7 @@ namespace pmacc
          *
          * @param exchangeType number characterizing exchange @see pmacc::type::ExchangeType
          */
-        HDINLINE bool isAxisAligned(uint32_t exchangeType)
+        constexpr bool isAxisAligned(uint32_t exchangeType)
         {
             return (FRONT % exchangeType == 0);
         }
diff --git a/include/pmacc/device/Reduce.hpp b/include/pmacc/device/Reduce.hpp
index af6a40785b..80c82f9d56 100644
--- a/include/pmacc/device/Reduce.hpp
+++ b/include/pmacc/device/Reduce.hpp
@@ -57,6 +57,13 @@ namespace pmacc
             {
             }
 
+            void tmpBufferAlloc()
+            {
+                // lazy allocation of the result buffer
+                if(!reduceBuffer)
+                    reduceBuffer = std::make_unique<GridBuffer<char, DIM1>>(DataSpace<DIM1>(byte));
+            }
+
             /** Reduce elements in global gpu memory
              *
              * @param func binary functor for reduce which takes two arguments, first argument is the source and
@@ -89,8 +96,7 @@ namespace pmacc
                     threads = n;
 
                 // lazy allocation of the result buffer
-                if(!reduceBuffer)
-                    reduceBuffer = std::make_unique<GridBuffer<char, DIM1>>(DataSpace<DIM1>(byte));
+                tmpBufferAlloc();
 
                 auto* dest = (Type*) reduceBuffer->getDeviceBuffer().data();
 
diff --git a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/fileOutput.param b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/fileOutput.param
new file mode 100644
index 0000000000..1b3793315b
--- /dev/null
+++ b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/fileOutput.param
@@ -0,0 +1,123 @@
+/* Copyright 2013-2024 Axel Huebl, Rene Widera, Felix Schmitt,
+ *                     Benjamin Worpitz, Richard Pausch, Pawel Ordyna
+ *
+ * This file is part of PIConGPU.
+ *
+ * PIConGPU is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * PIConGPU is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with PIConGPU.
+ * If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <pmacc/meta/conversion/MakeSeq.hpp>
+
+/* some forward declarations we need */
+#include "picongpu/fields/Fields.def"
+#include "picongpu/particles/filter/filter.def"
+#include "picongpu/particles/particleToGrid/CombinedDerive.def"
+#include "picongpu/particles/particleToGrid/ComputeGridValuePerFrame.def"
+
+namespace picongpu
+{
+    /** FieldTmp output (calculated at runtime) *******************************
+     *
+     * Those operations derive scalar field quantities from particle species
+     * at runtime. Each value is mapped per cell. Some operations are identical
+     * up to a constant, so avoid writing those twice to save storage.
+     *
+     * you can choose any of these particle to grid projections:
+     *   - Density: particle position + shape on the grid
+     *   - BoundElectronDensity: density of bound electrons
+     *       note: only makes sense for partially ionized ions
+     *   - ChargeDensity: density * charge
+     *       note: for species that do not change their charge state, this is
+     *             the same as the density times a constant for the charge
+     *   - Energy: sum of kinetic particle energy per cell with respect to shape
+     *   - EnergyDensity: average kinetic particle energy per cell times the
+     *                    particle density
+     *       note: this is the same as the sum of kinetic particle energy
+     *             divided by a constant for the cell volume
+     *   - Momentum: sum of chosen component of momentum per cell with respect to shape
+     *   - MomentumDensity: average chosen component of momentum per cell times the particle density
+     *       note: this is the same as the sum of the chosen momentum component
+     *             divided by a constant for the cell volume
+     *   - LarmorPower: radiated Larmor power
+     *                  (species must contain the attribute `momentumPrev1`)
+     *
+     * for debugging:
+     *   - MidCurrentDensityComponent:
+     *       density * charge * velocity_component
+     *   - Counter: counts point like particles per cell
+     *   - MacroCounter: counts point like macro particles per cell
+     *
+     * combined attributes:
+     *   These attributes are defined as a function of two primary derived attributes.
+     *
+     *   - AverageAttribute: template to compute a per particle average of any derived value
+     *   - RelativisticDensity: equals to 1/gamma^2 * n. Where gamma is the Lorentz factor for the mean kinetic energy
+     *      in the cell and n ist the usual number density. Useful for Faraday Rotation calculation.
+     *
+     *      Example use:
+     *      @code{.cpp}
+     *      using AverageEnergy_Seq = deriveField::CreateEligible_t<
+     *          VectorAllSpecies,
+     *          deriveField::combinedAttributes::AverageAttribute<deriveField::derivedAttributes::Energy>>;
+     *      using RelativisticDensity_Seq
+     *          = deriveField::CreateEligible_t<PIC_Electrons, deriveField::combinedAttributes::RelativisticDensity>;
+     *      @endcode
+     *
+     * Filtering:
+     *   You can create derived fields from filtered particles. Only particles passing
+     *   the filter will contribute to the field quantity.
+     *   For that you need to define your filters in `particleFilters.param` and pass a
+     *   filter as the 3rd (optional) template argument in `CreateEligible_t`.
+     *
+     *   Example: This will create charge density field for all species that are
+     *   eligible for the this attribute and the chosen filter.
+     *   @code{.cpp}
+     *   using ChargeDensity_Seq
+         = deriveField::CreateEligible_t< VectorAllSpecies,
+         deriveField::derivedAttributes::ChargeDensity, filter::FilterOfYourChoice>;
+     *   @endcode
+     */
+    namespace deriveField = particles::particleToGrid;
+
+    /* ChargeDensity section */
+    using ChargeDensity_Seq
+        = deriveField::CreateEligible_t<VectorAllSpecies, deriveField::derivedAttributes::ChargeDensity>;
+
+    /** FieldTmpSolvers groups all solvers that create data for FieldTmp ******
+     *
+     * FieldTmpSolvers is used in @see FieldTmp to calculate the exchange size
+     */
+    using FieldTmpSolvers = MakeSeq_t<ChargeDensity_Seq>;
+
+
+    /** FileOutputFields: Groups all Fields that shall be dumped *************/
+
+    /** Possible native fields: FieldE, FieldB, FieldJ
+     */
+    using NativeFileOutputFields = MakeSeq_t<fields::poissonSolver::FieldV>;
+
+    using FileOutputFields = MakeSeq_t<NativeFileOutputFields, FieldTmpSolvers>;
+
+
+    /** FileOutputParticles: Groups all Species that shall be dumped **********
+     *
+     * hint: to disable particle output set to
+     *   using FileOutputParticles = MakeSeq_t< >;
+     */
+    using FileOutputParticles = MakeSeq_t<>;
+
+} // namespace picongpu
diff --git a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/particle.param b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/particle.param
index 33e866d718..acbb710dfe 100644
--- a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/particle.param
+++ b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/particle.param
@@ -48,20 +48,20 @@ namespace picongpu
              *
              * Vector is automatically reduced to two dimensions for 2D (x,y) simulations.
              */
-            struct QuietParam
+            struct RandomParameter
             {
-                /** Count of macro-particles per cell per direction at initial state
+                /** Maximum number of macro-particles per cell during density profile evaluation.
                  *
-                 *  unit: none */
-                using numParticlesPerDimension = mCT::shrinkTo<
-                    mCT::Int<TYPICAL_PARTICLES_PER_CELL / 5, TYPICAL_PARTICLES_PER_CELL / 5, 1>,
-                    simDim>::type;
+                 * Determines the weighting of a macro particle as well as the number of
+                 * macro-particles which sample the evolution of the particle distribution
+                 * function in phase space.
+                 *
+                 * unit: none
+                 */
+                static constexpr uint32_t numParticlesPerCell = 25u;
             };
 
-            /** Definition of Quiet start position functor that positions macro-particles regularly on the grid.
-             * No random number generator used.
-             */
-            using Quiet = QuietImpl<QuietParam>;
+            using Random = RandomImpl<RandomParameter>;
 
         } // namespace startPosition
 
diff --git a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/speciesInitialization.param b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/speciesInitialization.param
index 7b6b819c38..12ccda19dc 100644
--- a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/speciesInitialization.param
+++ b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/speciesInitialization.param
@@ -39,8 +39,8 @@ namespace picongpu
          * the functors are called in order (from first to last functor)
          */
         using InitPipeline = pmacc::mp_list<
-            CreateDensity<densityProfiles::Homogenous, startPosition::Quiet, PIC_Electrons>,
-            Derive<PIC_Electrons, PIC_Ions>,
+            CreateDensity<densityProfiles::Homogenous, startPosition::Random, PIC_Electrons>,
+            CreateDensity<densityProfiles::Homogenous, startPosition::Random, PIC_Ions>,
             Manipulate<manipulators::AssignXDriftPositive, PIC_Ions, filter::LowerQuarterYPosition>,
             Manipulate<manipulators::AssignXDriftNegative, PIC_Ions, filter::MiddleHalfYPosition>,
             Manipulate<manipulators::AssignXDriftPositive, PIC_Ions, filter::UpperQuarterYPosition>,