diff --git a/include/picongpu/plugins/openPMD/NDScalars.hpp b/include/picongpu/plugins/openPMD/NDScalars.hpp
index 7ef07f6235..d027417ed1 100644
--- a/include/picongpu/plugins/openPMD/NDScalars.hpp
+++ b/include/picongpu/plugins/openPMD/NDScalars.hpp
@@ -174,11 +174,17 @@ namespace picongpu
                 DataSpace<simDim> gridPos = Environment<simDim>::get().GridController().getPosition();
                 ::openPMD::Offset start;
                 ::openPMD::Extent count;
+                ::openPMD::Extent extent = mrc.getExtent();
                 start.reserve(ndim);
                 count.reserve(ndim);
                 for(int d = 0; d < ndim; ++d)
                 {
-                    start.push_back(gridPos.revert()[d]);
+                    /*
+                     * When restarting with more parallel processes than the checkpoint had originally been written
+                     * with, we must take care not to index past the dataset boundaries. Just loop around to the start
+                     * in that case. Not the finest way, but it does the job for now..
+                     */
+                    start.push_back(gridPos.revert()[d] % extent[d]);
                     count.push_back(1);
                 }
 
diff --git a/include/picongpu/plugins/openPMD/openPMDWriter.x.cpp b/include/picongpu/plugins/openPMD/openPMDWriter.x.cpp
index b835de9627..5f5921f4e3 100644
--- a/include/picongpu/plugins/openPMD/openPMDWriter.x.cpp
+++ b/include/picongpu/plugins/openPMD/openPMDWriter.x.cpp
@@ -1024,6 +1024,12 @@ make sure that environment variable OPENPMD_BP_BACKEND is not set to ADIOS1.
                 {
                     loadRngStatesImpl(&mThreadParams, restartStep);
                 }
+                catch(std::exception const& e)
+                {
+                    log<picLog::INPUT_OUTPUT>("openPMD: loading RNG states failed, they will be re-initialized "
+                                              "instead. Original error:\n\t%1%")
+                        % e.what();
+                }
                 catch(...)
                 {
                     log<picLog::INPUT_OUTPUT>(
diff --git a/include/picongpu/plugins/openPMD/restart/LoadSpecies.hpp b/include/picongpu/plugins/openPMD/restart/LoadSpecies.hpp
index d7487bbda4..d1fb263f0f 100644
--- a/include/picongpu/plugins/openPMD/restart/LoadSpecies.hpp
+++ b/include/picongpu/plugins/openPMD/restart/LoadSpecies.hpp
@@ -35,8 +35,10 @@
 
 #    include <boost/mpl/placeholders.hpp>
 
+#    include <algorithm>
 #    include <cassert>
 #    include <stdexcept>
+#    include <utility>
 
 #    include <openPMD/openPMD.hpp>
 
@@ -46,6 +48,84 @@ namespace picongpu
     {
         using namespace pmacc;
 
+        template<typename T_Identifier>
+        struct RedistributeFilteredParticles
+        {
+            template<typename FrameType, typename FilterType, typename RemapType>
+            HINLINE void operator()(
+                FrameType& frame,
+                FilterType const& filter,
+                RemapType const& remap,
+                uint64_t const numParticlesCurrentBatch,
+                char const filterRemove)
+            {
+                using Identifier = T_Identifier;
+                using ValueType = typename pmacc::traits::Resolve<Identifier>::type::type;
+                using ComponentType = typename GetComponentsType<ValueType>::type;
+
+                ValueType* dataPtr = frame.getIdentifier(Identifier()).getPointer();
+
+                for(size_t particleIndex = 0; particleIndex < numParticlesCurrentBatch; ++particleIndex)
+                {
+                    if(filter[particleIndex] == filterRemove)
+                    {
+                        continue;
+                    }
+                    dataPtr[remap[particleIndex]] = dataPtr[particleIndex];
+                }
+            }
+        };
+
+        struct KernelFilterParticles
+        {
+            template<typename T_Worker, typename FrameType, typename FilterType>
+            DINLINE void operator()(
+                T_Worker const& worker,
+                FrameType&& loadedData,
+                MemIdxType size,
+                DataSpace<simDim> patchTotalOffset,
+                DataSpace<simDim> patchUpperCorner,
+                char const filterKeep,
+                char const filterRemove,
+                FilterType&& filterOut) const
+            {
+                // DataSpace<1> particleIndex = worker.blockDomIdxND();
+                constexpr uint32_t blockDomSize = T_Worker::blockDomSize();
+                auto numDataBlocks = (size + blockDomSize - 1u) / blockDomSize;
+                auto position_ = loadedData.getIdentifier(position()).getPointer();
+                auto positionOffset = loadedData.getIdentifier(totalCellIdx()).getPointer();
+
+                // grid-strided loop over the chunked data
+                for(uint32_t dataBlock = worker.blockDomIdx(); dataBlock < numDataBlocks;
+                    dataBlock += worker.gridDomSize())
+                {
+                    auto dataBlockOffset = dataBlock * blockDomSize;
+                    auto forEach = pmacc::lockstep::makeForEach(worker);
+                    forEach(
+                        [&](uint32_t const inBlockIdx)
+                        {
+                            auto idx = dataBlockOffset + inBlockIdx;
+                            if(idx < size)
+                            {
+                                auto& positionVec = position_[idx];
+                                auto& positionOffsetVec = positionOffset[idx];
+                                char filterCurrent = filterKeep;
+                                for(size_t d = 0; d < simDim; ++d)
+                                {
+                                    auto positionInD = positionVec[d] + positionOffsetVec[d];
+                                    if(positionInD < patchTotalOffset[d] || positionInD >= patchUpperCorner[d])
+                                    {
+                                        filterCurrent = filterRemove;
+                                        break;
+                                    }
+                                }
+                                filterOut[idx] = filterCurrent;
+                            }
+                        });
+                }
+            }
+        };
+
         /** Load species from openPMD checkpoint storage
          *
          * @tparam T_Species type of species
@@ -70,6 +150,227 @@ namespace picongpu
             using NewParticleDescription =
                 typename ReplaceValueTypeSeq<ParticleDescription, ParticleNewAttributeList>::type;
 
+            struct LoadParams
+            {
+                using FrameType = Frame<OperatorCreateVectorBox, NewParticleDescription>;
+                using BufferType = Frame<OperatorCreateAlpakaBuffer, NewParticleDescription>;
+
+                std::string const& speciesName;
+                ::openPMD::ParticleSpecies const& particleSpecies;
+                std::shared_ptr<ThisSpecies> const& speciesTmp;
+                uint64_t* patchNumParticles;
+                uint64_t* patchNumParticlesOffset;
+                DataSpace<simDim> const& cellOffsetToTotalDomain;
+                uint32_t restartChunkSize;
+
+                auto numParticlesAndChunkSize(std::deque<size_t> const& matches) const -> std::pair<uint64_t, uint64_t>
+                {
+                    uint64_t totalNumParticles = std::transform_reduce(
+                        matches.begin(),
+                        matches.end(),
+                        0,
+                        /* reduce = */ [](uint64_t left, uint64_t right) { return left + right; },
+                        /* transform = */ [this](size_t patchIdx) { return patchNumParticles[patchIdx]; });
+                    uint64_t maxChunkSize = std::min(static_cast<uint64_t>(restartChunkSize), totalNumParticles);
+                    return std::make_pair(totalNumParticles, maxChunkSize);
+                }
+
+                template<typename Functor>
+                void loadMatchesGeneric(
+                    std::deque<size_t> const& matches,
+                    ThreadParams* threadParams,
+                    uint64_t totalNumParticles,
+                    uint64_t maxChunkSize,
+                    Functor&& forEachPatch) const
+                {
+                    BufferType buffers;
+                    FrameType mappedFrame;
+                    log<picLog::INPUT_OUTPUT>("openPMD: malloc mapped memory: %1%") % speciesName;
+                    /*malloc mapped memory*/
+                    meta::ForEach<typename NewParticleDescription::ValueTypeSeq, MallocMappedMemory<boost::mpl::_1>>
+                        mallocMem;
+                    mallocMem(buffers, mappedFrame, maxChunkSize);
+
+                    for(size_t const patchIdx : matches)
+                    {
+                        uint64_t particleOffset = patchNumParticlesOffset[patchIdx];
+                        uint64_t numParticles = patchNumParticles[patchIdx];
+
+                        log<picLog::INPUT_OUTPUT>("openPMD: Loading %1% particles from offset %2%")
+                            % (long long unsigned) totalNumParticles % (long long unsigned) particleOffset;
+
+
+                        uint32_t const numLoadIterations
+                            = maxChunkSize == 0u ? 0u : alpaka::core::divCeil(numParticles, maxChunkSize);
+
+                        for(uint64_t loadRound = 0u; loadRound < numLoadIterations; ++loadRound)
+                        {
+                            auto particleLoadOffset = particleOffset + loadRound * maxChunkSize;
+                            auto numLeftParticles = numParticles - loadRound * maxChunkSize;
+
+                            auto numParticlesCurrentBatch = std::min(numLeftParticles, maxChunkSize);
+
+                            log<picLog::INPUT_OUTPUT>("openPMD: (begin) load species %1% round: %2%/%3%") % speciesName
+                                % (loadRound + 1) % numLoadIterations;
+                            if(numParticlesCurrentBatch != 0)
+                            {
+                                std::forward<Functor>(forEachPatch)(
+                                    loadRound,
+                                    numParticlesCurrentBatch,
+                                    mappedFrame,
+                                    particleLoadOffset);
+                            }
+                            log<picLog::INPUT_OUTPUT>("openPMD: ( end ) load species %1% round: %2%/%3%") % speciesName
+                                % (loadRound + 1) % numLoadIterations;
+                        }
+                    }
+                }
+
+                void loadFullMatches(std::deque<size_t> const& fullMatches, ThreadParams* threadParams) const
+                {
+                    auto [totalNumParticles, maxChunkSize] = numParticlesAndChunkSize(fullMatches);
+                    loadMatchesGeneric(
+                        fullMatches,
+                        threadParams,
+                        totalNumParticles,
+                        maxChunkSize,
+                        /* forEachPatch = */
+                        [this, threadParams](
+                            uint64_t loadRound,
+                            uint64_t numParticlesCurrentBatch,
+                            FrameType& mappedFrame,
+                            uint64_t particleLoadOffset)
+                        {
+                            meta::ForEach<
+                                typename NewParticleDescription::ValueTypeSeq,
+                                LoadParticleAttributesFromOpenPMD<boost::mpl::_1>>
+                                loadAttributes;
+                            loadAttributes(
+                                threadParams,
+                                mappedFrame,
+                                particleSpecies,
+                                particleLoadOffset,
+                                numParticlesCurrentBatch);
+
+                            pmacc::particles::operations::splitIntoListOfFrames(
+                                *speciesTmp,
+                                mappedFrame,
+                                numParticlesCurrentBatch,
+                                cellOffsetToTotalDomain,
+                                totalCellIdx_,
+                                *threadParams->cellDescription,
+                                picLog::INPUT_OUTPUT());
+                        });
+                }
+
+                void loadPartialMatches(std::deque<size_t> const& partialMatches, ThreadParams* threadParams) const
+                {
+                    auto [totalNumParticles, maxChunkSize] = numParticlesAndChunkSize(partialMatches);
+
+                    SubGrid<simDim> const& subGrid = Environment<simDim>::get().SubGrid();
+                    pmacc::Selection<simDim> const localDomain = subGrid.getLocalDomain();
+                    pmacc::Selection<simDim> const globalDomain = subGrid.getGlobalDomain();
+                    /* Offset to transform local particle offsets into total offsets for all particles within the
+                     * current local domain.
+                     * @attention A window can be the full simulation domain or the moving window.
+                     */
+                    DataSpace<simDim> localToTotalDomainOffset(localDomain.offset + globalDomain.offset);
+
+                    /* params->localWindowToDomainOffset is in PIConGPU for a restart zero but to stay generic we take
+                     * the variable into account.
+                     */
+                    DataSpace<simDim> const patchTotalOffset
+                        = localToTotalDomainOffset + threadParams->localWindowToDomainOffset;
+                    DataSpace<simDim> const patchExtent = threadParams->window.localDimensions.size;
+                    DataSpace<simDim> const patchUpperCorner = patchTotalOffset + patchExtent;
+
+                    constexpr bool isMappedMemorySupported
+                        = alpaka::hasMappedBufSupport<::alpaka::Platform<pmacc::ComputeDevice>>;
+                    PMACC_VERIFY_MSG(isMappedMemorySupported, "Device must support mapped memory!");
+                    auto filter = alpaka::allocMappedBuf<char, MemIdxType>(
+                        manager::Device<HostDevice>::get().current(),
+                        manager::Device<ComputeDevice>::get().getPlatform(),
+                        MemSpace<DIM1>(maxChunkSize).toAlpakaMemVec());
+                    auto remap = alpaka::allocMappedBuf<MemIdxType, MemIdxType>(
+                        manager::Device<HostDevice>::get().current(),
+                        manager::Device<ComputeDevice>::get().getPlatform(),
+                        MemSpace<DIM1>(maxChunkSize).toAlpakaMemVec());
+
+                    loadMatchesGeneric(
+                        partialMatches,
+                        threadParams,
+                        totalNumParticles,
+                        maxChunkSize, /* forEachPatch = */
+                        [this, threadParams, &filter, &remap, &patchTotalOffset, &patchExtent, &patchUpperCorner](
+                            uint64_t loadRound,
+                            uint64_t numParticlesCurrentBatch,
+                            FrameType& mappedFrame,
+                            uint64_t particleLoadOffset)
+                        {
+                            meta::ForEach<
+                                typename NewParticleDescription::ValueTypeSeq,
+                                LoadParticleAttributesFromOpenPMD<boost::mpl::_1>>
+                                loadAttributes;
+                            loadAttributes(
+                                threadParams,
+                                mappedFrame,
+                                particleSpecies,
+                                particleLoadOffset,
+                                numParticlesCurrentBatch);
+
+                            constexpr char filterKeep{1}, filterRemove{0};
+                            PMACC_LOCKSTEP_KERNEL(KernelFilterParticles{})
+                                .config<DIM1>(pmacc::math::Vector{numParticlesCurrentBatch})(
+                                    mappedFrame,
+                                    numParticlesCurrentBatch,
+                                    patchTotalOffset,
+                                    patchUpperCorner,
+                                    filterKeep,
+                                    filterRemove,
+                                    alpaka::getPtrNative(filter));
+                            eventSystem::getTransactionEvent().waitForFinished();
+
+                            // For simplicity, do the remapping on the CPU again
+                            MemIdxType remapCurrent = 0;
+                            for(size_t particleIndex = 0; particleIndex < numParticlesCurrentBatch; ++particleIndex)
+                            {
+                                if(filter[particleIndex] == filterKeep)
+                                {
+                                    remap[particleIndex] = remapCurrent++;
+                                }
+                                else
+                                {
+                                    remap[particleIndex] = std::numeric_limits<MemIdxType>::max();
+                                }
+                            }
+
+                            meta::ForEach<
+                                typename NewParticleDescription::ValueTypeSeq,
+                                RedistributeFilteredParticles<boost::mpl::_1>>
+                                redistributeFilteredParticles;
+                            redistributeFilteredParticles(
+                                mappedFrame,
+                                filter,
+                                remap,
+                                numParticlesCurrentBatch,
+                                filterRemove);
+
+                            log<picLog::INPUT_OUTPUT>(
+                                "openPMD: Keeping %1% of the current batch's %2% particles after filtering.")
+                                % remapCurrent % numParticlesCurrentBatch;
+
+                            pmacc::particles::operations::splitIntoListOfFrames(
+                                *speciesTmp,
+                                mappedFrame,
+                                remapCurrent, // !! not numParticlesCurrentBatch, filtered vs. unfiltered number
+                                cellOffsetToTotalDomain,
+                                totalCellIdx_,
+                                *threadParams->cellDescription,
+                                picLog::INPUT_OUTPUT());
+                        });
+                }
+            };
+
             /** Load species from openPMD checkpoint storage
              *
              * @param params thread params
@@ -81,7 +382,6 @@ namespace picongpu
                 std::string const speciesName = FrameType::getName();
                 log<picLog::INPUT_OUTPUT>("openPMD: (begin) load species: %1%") % speciesName;
                 DataConnector& dc = Environment<>::get().DataConnector();
-                GridController<simDim>& gc = Environment<simDim>::get().GridController();
 
                 ::openPMD::Series& series = *params->openPMDSeries;
                 ::openPMD::Container<::openPMD::ParticleSpecies>& particles
@@ -99,93 +399,52 @@ namespace picongpu
                 // openPMD
                 eventSystem::getTransactionEvent().waitForFinished();
 
-                auto numRanks = gc.getGlobalSize();
-
-                size_t patchIdx = getPatchIdx(params, particleSpecies, numRanks);
+                auto [fullMatches, partialMatches] = getPatchIdx(params, particleSpecies);
 
-                std::shared_ptr<uint64_t> fullParticlesInfoShared
-                    = particleSpecies.particlePatches["numParticles"][::openPMD::RecordComponent::SCALAR]
-                          .load<uint64_t>();
+                std::shared_ptr<uint64_t> numParticlesShared
+                    = particleSpecies.particlePatches["numParticles"].load<uint64_t>();
+                std::shared_ptr<uint64_t> numParticlesOffsetShared
+                    = particleSpecies.particlePatches["numParticlesOffset"].load<uint64_t>();
                 particles.seriesFlush();
-                uint64_t* fullParticlesInfo = fullParticlesInfoShared.get();
-
-                /* Run a prefix sum over the numParticles[0] element in
-                 * particlesInfo to retreive the offset of particles
-                 */
-                uint64_t particleOffset = 0u;
-                /* count total number of particles on the device */
-                uint64_t totalNumParticles = 0u;
-
-                assert(patchIdx < numRanks);
-
-                for(size_t i = 0u; i <= patchIdx; ++i)
-                {
-                    if(i < patchIdx)
-                        particleOffset += fullParticlesInfo[i];
-                    if(i == patchIdx)
-                        totalNumParticles = fullParticlesInfo[i];
-                }
-
-                log<picLog::INPUT_OUTPUT>("openPMD: Loading %1% particles from offset %2%")
-                    % (long long unsigned) totalNumParticles % (long long unsigned) particleOffset;
+                uint64_t* patchNumParticles = numParticlesShared.get();
+                uint64_t* patchNumParticlesOffset = numParticlesOffsetShared.get();
+
+                LoadParams lp{
+                    speciesName,
+                    particleSpecies,
+                    speciesTmp,
+                    patchNumParticles,
+                    patchNumParticlesOffset,
+                    cellOffsetToTotalDomain,
+                    restartChunkSize};
+                lp.loadFullMatches(fullMatches, params);
+                lp.loadPartialMatches(partialMatches, params);
 
-                log<picLog::INPUT_OUTPUT>("openPMD: malloc mapped memory: %1%") % speciesName;
-
-                using FrameType = Frame<OperatorCreateVectorBox, NewParticleDescription>;
-                using BufferType = Frame<OperatorCreateAlpakaBuffer, NewParticleDescription>;
-
-                BufferType buffers;
-                FrameType mappedFrame;
-
-                uint64_t maxChunkSize = std::min(static_cast<uint64_t>(restartChunkSize), totalNumParticles);
-
-                /*malloc mapped memory*/
-                meta::ForEach<typename NewParticleDescription::ValueTypeSeq, MallocMappedMemory<boost::mpl::_1>>
-                    mallocMem;
-                mallocMem(buffers, mappedFrame, maxChunkSize);
+                log<picLog::INPUT_OUTPUT>("openPMD: ( end ) load species: %1%") % speciesName;
+            }
 
-                uint32_t const numLoadIterations
-                    = maxChunkSize == 0u ? 0u : alpaka::core::divCeil(totalNumParticles, maxChunkSize);
+        private:
+            // o: offset, e: extent, u: upper corner (= o+e)
+            static std::pair<DataSpace<simDim>, DataSpace<simDim>> intersect(
+                DataSpace<simDim> const& o1,
+                DataSpace<simDim> const& e1,
+                DataSpace<simDim> const& o2,
+                DataSpace<simDim> const& e2)
+            {
+                // Convert extents into upper coordinates
+                auto u1 = o1 + e1;
+                auto u2 = o2 + e2;
 
-                for(uint64_t loadRound = 0u; loadRound < numLoadIterations; ++loadRound)
+                DataSpace<simDim> intersect_o, intersect_u, intersect_e;
+                for(unsigned d = 0; d < simDim; ++d)
                 {
-                    auto particleLoadOffset = particleOffset + loadRound * maxChunkSize;
-                    auto numLeftParticles = totalNumParticles - loadRound * maxChunkSize;
-
-                    auto numParticlesCurrentBatch = std::min(numLeftParticles, maxChunkSize);
-
-                    log<picLog::INPUT_OUTPUT>("openPMD: (begin) load species %1% round: %2%/%3%") % speciesName
-                        % (loadRound + 1) % numLoadIterations;
-                    if(numParticlesCurrentBatch != 0)
-                    {
-                        meta::ForEach<
-                            typename NewParticleDescription::ValueTypeSeq,
-                            LoadParticleAttributesFromOpenPMD<boost::mpl::_1>>
-                            loadAttributes;
-                        loadAttributes(
-                            params,
-                            mappedFrame,
-                            particleSpecies,
-                            particleLoadOffset,
-                            numParticlesCurrentBatch);
-
-
-                        pmacc::particles::operations::splitIntoListOfFrames(
-                            *speciesTmp,
-                            mappedFrame,
-                            numParticlesCurrentBatch,
-                            cellOffsetToTotalDomain,
-                            totalCellIdx_,
-                            *(params->cellDescription),
-                            picLog::INPUT_OUTPUT());
-                    }
-                    log<picLog::INPUT_OUTPUT>("openPMD: ( end ) load species %1% round: %2%/%3%") % speciesName
-                        % (loadRound + 1) % numLoadIterations;
+                    intersect_o[d] = std::max(o1[d], o2[d]);
+                    intersect_u[d] = std::min(u1[d], u2[d]);
+                    intersect_e[d] = intersect_u[d] > intersect_o[d] ? intersect_u[d] - intersect_o[d] : 0;
                 }
-                log<picLog::INPUT_OUTPUT>("openPMD: ( end ) load species: %1%") % speciesName;
+                return {intersect_o, intersect_e};
             }
 
-        private:
             /** get index for particle data within the openPMD patch data
              *
              * It is not possible to assume that we can use the MPI rank to load the particle data.
@@ -197,13 +456,16 @@ namespace picongpu
              *
              * @return index of the particle patch within the openPMD data
              */
-            HINLINE size_t
-            getPatchIdx(ThreadParams* params, ::openPMD::ParticleSpecies particleSpecies, size_t numRanks)
+            HINLINE std::pair<std::deque<size_t>, std::deque<size_t>> getPatchIdx(
+                ThreadParams* params,
+                ::openPMD::ParticleSpecies particleSpecies)
             {
                 std::string const name_lookup[] = {"x", "y", "z"};
 
-                std::vector<DataSpace<simDim>> offsets(numRanks);
-                std::vector<DataSpace<simDim>> extents(numRanks);
+                size_t patches = particleSpecies.particlePatches["numParticles"].getExtent()[0];
+
+                std::vector<DataSpace<simDim>> offsets(patches);
+                std::vector<DataSpace<simDim>> extents(patches);
 
                 // transform openPMD particle patch data into PIConGPU data objects
                 for(uint32_t d = 0; d < simDim; ++d)
@@ -213,7 +475,7 @@ namespace picongpu
                     std::shared_ptr<uint64_t> patchExtentsInfoShared
                         = particleSpecies.particlePatches["extent"][name_lookup[d]].load<uint64_t>();
                     particleSpecies.seriesFlush();
-                    for(size_t i = 0; i < numRanks; ++i)
+                    for(size_t i = 0; i < patches; ++i)
                     {
                         offsets[i][d] = patchOffsetsInfoShared.get()[i];
                         extents[i][d] = patchExtentsInfoShared.get()[i];
@@ -235,18 +497,47 @@ namespace picongpu
                 DataSpace<simDim> const patchTotalOffset
                     = localToTotalDomainOffset + params->localWindowToDomainOffset;
                 DataSpace<simDim> const patchExtent = params->window.localDimensions.size;
+                math::Vector<bool, simDim> true_;
+                for(unsigned d = 0; d < simDim; ++d)
+                {
+                    true_[d] = true;
+                }
 
                 // search the patch index based on the offset and extents of local domain size
-                for(size_t i = 0; i < numRanks; ++i)
+                std::deque<size_t> fullMatches;
+                std::deque<size_t> partialMatches;
+                size_t noMatches = 0;
+                for(size_t i = 0; i < patches; ++i)
                 {
-                    if(patchTotalOffset == offsets[i] && patchExtent == extents[i])
-                        return i;
+                    // std::cout << "Comp.: " << patchTotalOffset << " - " << (patchTotalOffset + patchExtent)
+                    //           << "\tAGAINST " << offsets[i] << " - " << (offsets[i] + extents[i])
+                    //           << "\toffsets: " << (patchTotalOffset <= offsets[i])
+                    //           << ", extents: " << ((offsets[i] + extents[i]) <= (patchTotalOffset +
+                    //           patchExtent)) <<
+                    //           '\n';
+                    if((patchTotalOffset <= offsets[i]) == true_
+                       && ((offsets[i] + extents[i]) <= (patchTotalOffset + patchExtent)) == true_)
+                    {
+                        fullMatches.emplace_back(i);
+                    }
+                    else if(
+                        intersect(offsets[i], extents[i], patchTotalOffset, patchExtent).second.productOfComponents()
+                        != 0)
+                    {
+                        partialMatches.emplace_back(i);
+                    }
+                    else
+                    {
+                        ++noMatches;
+                    }
                 }
-                // If no patch fits the conditions, something went wrong before
-                throw std::runtime_error(
-                    "Error while restarting: no particle patch matches the required offset and extent");
-                // Fake return still needed to avoid warnings
-                return 0;
+
+                log<picLog::INPUT_OUTPUT>(
+                    "openPMD: Found %1% fully and %2% partially matching particle patch(es). %3% "
+                    "patch was / patches were not matched.")
+                    % fullMatches.size() % partialMatches.size() % noMatches;
+
+                return std::make_pair(std::move(fullMatches), std::move(partialMatches));
             }
         };
 
diff --git a/include/picongpu/plugins/openPMD/restart/RestartFieldLoader.hpp b/include/picongpu/plugins/openPMD/restart/RestartFieldLoader.hpp
index 939d65b3e0..ec66a6ee0b 100644
--- a/include/picongpu/plugins/openPMD/restart/RestartFieldLoader.hpp
+++ b/include/picongpu/plugins/openPMD/restart/RestartFieldLoader.hpp
@@ -77,6 +77,9 @@ namespace picongpu
                 DataSpace<simDim> local_domain_size = params->window.localDimensions.size;
                 bool useLinearIdxAsDestination = false;
 
+                ::openPMD::Series& series = *params->openPMDSeries;
+                ::openPMD::Mesh& mesh = series.iterations[currentStep].open().meshes[objectName];
+
                 /* Patch for non-domain-bound fields
                  * This is an ugly fix to allow output of reduced 1d PML buffers
                  */
@@ -85,6 +88,7 @@ namespace picongpu
                     auto const field_layout = field.getGridLayout();
                     auto const field_no_guard = field_layout.sizeWithoutGuardND();
                     auto const elementCount = field_no_guard.productOfComponents();
+                    uint64_t pmlTotalSize = 0;
 
                     /* Scan the PML buffer local size along all local domains
                      * This code is symmetric to one in Field::writeField()
@@ -112,9 +116,20 @@ namespace picongpu
                     {
                         if(localSizes.at(2u * r + 1u) < rank)
                             domainOffset += localSizes.at(2u * r);
+                        pmlTotalSize += localSizes.at(2u * r);
                     }
                     log<picLog::INPUT_OUTPUT>("openPMD:  (end) collect PML sizes for %1%") % objectName;
 
+                    if(auto const& extentOnDisk = mesh.begin()->second.getExtent();
+                       extentOnDisk != ::openPMD::Extent{1, 1, pmlTotalSize})
+                    {
+                        log<picLog::INPUT_OUTPUT>(
+                            "openPMD:  Skip loading for PML fields. Expecting extent %1%, found extent %2% on disk. "
+                            "This may happen when restarting with a different domain decomposition.")
+                            % pmlTotalSize % extentOnDisk.at(2);
+                        return;
+                    }
+
                     domain_offset = DataSpace<simDim>::create(0);
                     domain_offset[0] = domainOffset;
                     local_domain_size = DataSpace<simDim>::create(1);
@@ -122,9 +137,6 @@ namespace picongpu
                     useLinearIdxAsDestination = true;
                 }
 
-                ::openPMD::Series& series = *params->openPMDSeries;
-                ::openPMD::Container<::openPMD::Mesh>& meshes = series.iterations[currentStep].open().meshes;
-
                 auto destBox = field.getHostBuffer().getDataBox();
                 for(uint32_t n = 0; n < numComponents; ++n)
                 {
@@ -133,9 +145,8 @@ namespace picongpu
                     // data.
                     log<picLog::INPUT_OUTPUT>("openPMD: Read from domain: offset=%1% size=%2%") % domain_offset
                         % local_domain_size;
-                    ::openPMD::RecordComponent rc = numComponents > 1
-                                                        ? meshes[objectName][name_lookup_tpl[n]]
-                                                        : meshes[objectName][::openPMD::RecordComponent::SCALAR];
+                    ::openPMD::RecordComponent rc
+                        = numComponents > 1 ? mesh[name_lookup_tpl[n]] : mesh[::openPMD::RecordComponent::SCALAR];
 
                     log<picLog::INPUT_OUTPUT>("openPMD: Read from field '%1%'") % objectName;
 
@@ -158,7 +169,7 @@ namespace picongpu
                     std::shared_ptr<float_X> field_container = rc.loadChunk<float_X>(start, count);
 
                     /* start a blocking read of all scheduled variables */
-                    meshes.seriesFlush();
+                    mesh.seriesFlush();
 
 
                     int const elementCount = local_domain_size.productOfComponents();
@@ -219,6 +230,7 @@ namespace picongpu
 
                 /* load from openPMD */
                 bool const isDomainBound = traits::IsFieldDomainBound<T_Field>::value;
+
                 RestartFieldLoader::loadField(
                     field->getGridBuffer(),
                     (uint32_t) T_Field::numComponents,