diff --git a/CMakeLists.txt b/CMakeLists.txt index 890cbaf27f..7a2b99ae8d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1014,6 +1014,54 @@ if(DEPTHAI_BASALT_SUPPORT) endif() endif() +######################## +# Extra host nodes +######################## +set(TARGET_EXTRA_ALIAS hostNodesExt) +set(TARGET_EXTRA_NAME ${PROJECT_NAME}-${TARGET_EXTRA_ALIAS}) +if (DEPTHAI_BUILD_EXT_HOST_NODES) + set(TARGET_EXT_NODES_SOURCES + ParserGenerator.cpp + host_nodes_ext/ParsingNeuralNetwork.cpp + host_nodes_ext/parsers/BaseParser.cpp + host_nodes_ext/parsers/SimCCKeypointParser.cpp + host_nodes_ext/parsers/KeypointParser.cpp + host_nodes_ext/messages/Keypoints.cpp + ) + list(TRANSFORM TARGET_EXT_NODES_SOURCES PREPEND "src/pipeline/node/host/contrib/") +# + + # Add depthai-extraHostNodes library and importable target/alias depthai::extraHostNodes + add_library(${TARGET_EXTRA_NAME} ${TARGET_EXT_NODES_SOURCES}) + add_library("${PROJECT_NAME}::${TARGET_EXTRA_ALIAS}" ALIAS ${TARGET_EXTRA_NAME}) + set_target_properties(${TARGET_EXTRA_NAME} PROPERTIES EXPORT_NAME ${TARGET_EXTRA_ALIAS}) + + + target_include_directories(${TARGET_EXTRA_NAME} + PUBLIC + "$" + +# "$" +# "$" + "$" + PRIVATE + "$" + "$" + "$" + "$" + ) + + target_link_libraries(${TARGET_EXTRA_NAME} + PRIVATE + spdlog::spdlog + xtensor + PUBLIC + depthai::core + ) + + list(APPEND targets_to_export ${TARGET_EXTRA_NAME}) +endif () + ######################## # Combined target ######################## diff --git a/cmake/depthaiOptions.cmake b/cmake/depthaiOptions.cmake index 7ec18f4e53..5adb55c94a 100644 --- a/cmake/depthaiOptions.cmake +++ b/cmake/depthaiOptions.cmake @@ -34,6 +34,7 @@ option(DEPTHAI_BUILD_TESTS "Build tests" OFF) option(DEPTHAI_BUILD_EXAMPLES "Build examples - Requires OpenCV library to be installed" OFF) option(DEPTHAI_BUILD_DOCS "Build documentation - requires doxygen to be installed" OFF) option(DEPTHAI_BUILD_ZOO_HELPER "Build the Zoo helper" OFF) +option(DEPTHAI_BUILD_EXT_HOST_NODES "Build the contrib host nodes lib" OFF) option(DEPTHAI_NEW_FIND_PYTHON "Use new FindPython module" ON) option(DEPTHAI_INSTALL "Enable install target for depthai-core targets" ON) diff --git a/examples/cpp/HostNodes/CMakeLists.txt b/examples/cpp/HostNodes/CMakeLists.txt index 94bddf5433..d5a8291152 100644 --- a/examples/cpp/HostNodes/CMakeLists.txt +++ b/examples/cpp/HostNodes/CMakeLists.txt @@ -24,4 +24,8 @@ dai_set_example_test_labels(threaded_host_node ondevice rvc2_all rvc4 ci) dai_add_example(host_pipeline_synced_node host_pipeline_synced_node.cpp ON OFF) dai_set_example_test_labels(host_pipeline_synced_node ondevice rvc2_all rvc4 ci) -dai_add_example(host_only_camera host_only_camera.cpp OFF OFF) \ No newline at end of file +dai_add_example(host_only_camera host_only_camera.cpp OFF OFF) + +if(DEPTHAI_BUILD_EXT_HOST_NODES) + add_subdirectory(HostNodesExtended) +endif () \ No newline at end of file diff --git a/examples/cpp/HostNodes/HostNodesExtended/CMakeLists.txt b/examples/cpp/HostNodes/HostNodesExtended/CMakeLists.txt new file mode 100644 index 0000000000..679ea1580e --- /dev/null +++ b/examples/cpp/HostNodes/HostNodesExtended/CMakeLists.txt @@ -0,0 +1,3 @@ +dai_add_example(simcc_parser simcc_parser.cpp ON OFF) +dai_set_example_test_labels(simcc_parser ondevice rvc4 ci) +target_link_libraries(simcc_parser PRIVATE utility depthai::hostNodesExt) diff --git a/examples/cpp/HostNodes/HostNodesExtended/simcc_parser.cpp b/examples/cpp/HostNodes/HostNodesExtended/simcc_parser.cpp new file mode 100644 index 0000000000..ba8c2cbb59 --- /dev/null +++ b/examples/cpp/HostNodes/HostNodesExtended/simcc_parser.cpp @@ -0,0 +1,30 @@ +#include "depthai/pipeline/node/host/contrib/host_nodes_ext/ParsingNeuralNetwork.hpp" +#include "messages/Keypoints.hpp" + +int main() { + + // Create device + std::shared_ptr device = std::make_shared(); + + // Create pipeline + dai::Pipeline pipeline(device); + + // Create nodes + auto camera = pipeline.create()->build(); + auto output = camera->requestOutput(std::make_pair(288, 384), dai::ImgFrame::Type::BGR888i, dai::ImgResizeMode::STRETCH); + + dai::NNModelDescription modelDescription{.model ="pedestl/rtmpose3d-open-mmlab-mmpose-large:v1-0-0:latest", .platform = pipeline.getDefaultDevice()->getPlatformAsString()}; + auto archive = dai::NNArchive(dai::getModelFromZoo(modelDescription)); + + auto parsed_nn = pipeline.create()->build(*output, archive); + // Create output queue, note that parsed_nn->out only exists if the NN has only a single parser head + auto out = parsed_nn->out.value().get().createOutputQueue(); + + // Start pipeline + pipeline.start(); + pipeline.processTasks(); + auto msg = out->get(); + std::shared_ptr keypoints = std::dynamic_pointer_cast(msg); + pipeline.stop(); + assert(keypoints != nullptr); +} diff --git a/include/depthai/pipeline/node/host/contrib/ParserGenerator.hpp b/include/depthai/pipeline/node/host/contrib/ParserGenerator.hpp new file mode 100644 index 0000000000..168fd7b2a3 --- /dev/null +++ b/include/depthai/pipeline/node/host/contrib/ParserGenerator.hpp @@ -0,0 +1,35 @@ +// +// Created by thwdpc on 7/24/25. +// + +#pragma once +#include + +#include "depthai/depthai.hpp" +#include "host_nodes_ext/parsers/BaseParser.hpp" + +namespace dai::node { + +typedef std::variant, std::shared_ptr> HostOrDeviceParser; +template +constexpr bool all_alternatives_shared_ptr = false; +template +constexpr bool all_alternatives_shared_ptr> = + (std::conjunction_v, Ts>...>); +static_assert(all_alternatives_shared_ptr, "All alternatives must be std::shared_ptr"); + +struct ConfigModelWithHeads { + nn_archive::v1::Model model; + std::vector heads; +}; + +class ParserGenerator { + public: + static std::vector generateAllParsers(Pipeline pipeline, const NNArchive& nnArchive, bool hostOnly = false); + + private: + static ConfigModelWithHeads archiveGetModelEnsureOneHeadV1(const NNArchive& nnArchive, Platform targetPlatform); + static HostOrDeviceParser generateOneV1Parser( + Pipeline& pipeline, const NNArchive& owningArchive, const nn_archive::v1::Head& head, const nn_archive::v1::Model& model, bool hostOnly = false); +}; +} // namespace dai::node diff --git a/include/depthai/pipeline/node/host/contrib/README.md b/include/depthai/pipeline/node/host/contrib/README.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/include/depthai/pipeline/node/host/contrib/host_nodes_ext/ParsingNeuralNetwork.hpp b/include/depthai/pipeline/node/host/contrib/host_nodes_ext/ParsingNeuralNetwork.hpp new file mode 100644 index 0000000000..9af0e992b2 --- /dev/null +++ b/include/depthai/pipeline/node/host/contrib/host_nodes_ext/ParsingNeuralNetwork.hpp @@ -0,0 +1,50 @@ +// +// Created by thwdpc on 7/24/25. +// +#pragma once +#include +#include + +#include "../ParserGenerator.hpp" +#include "depthai/depthai.hpp" +#include "parsers/BaseParser.hpp" +#include "parsers/KeypointParser.hpp" + +namespace dai::node { + +class ParsingNeuralNetwork : public CustomThreadedNode { + public: + std::shared_ptr build(Output& input, const NNArchive& nnArchive); + std::shared_ptr build(const std::shared_ptr& input, NNModelDescription modelDesc, std::optional fps = std::nullopt); + std::shared_ptr build(const std::shared_ptr& input, const NNArchive& nnArchive, std::optional fps = std::nullopt); + + InputMap& inputs = nn->inputs; + Input& input = nn->input; + std::optional> out = std::nullopt; + Output& passthrough = nn->passthrough; + OutputMap& passthroughs = nn->passthroughs; + + template + std::optional getIndexOfFirstParserOfType() const { + const auto which = std::find_if(parsers.begin(), parsers.end(), [](const auto& p) { + return std::visit([](auto& anyP) { return std::dynamic_pointer_cast(anyP) != nullptr; }, p);; + }); + return which == parsers.end() ? std::nullopt : static_cast>(std::distance(parsers.begin(), which)); + } + + void run() override; + + private: + std::vector getParserNodes(const NNArchive& nnArchive); + + void updateParsers(const NNArchive& nnArchive); + + void removeOldParserNodes(); + std::shared_ptr nn; + std::optional> parserSync = std::nullopt; + + protected: + std::vector parsers; +}; + +} // namespace dai::node diff --git a/include/depthai/pipeline/node/host/contrib/host_nodes_ext/messages/Keypoints.hpp b/include/depthai/pipeline/node/host/contrib/host_nodes_ext/messages/Keypoints.hpp new file mode 100644 index 0000000000..f53b5babf0 --- /dev/null +++ b/include/depthai/pipeline/node/host/contrib/host_nodes_ext/messages/Keypoints.hpp @@ -0,0 +1,56 @@ +// +// Created by thwdpc on 7/28/25. +// + +#pragma once +#include +#include +#include + +namespace dai { + +struct ValueWithConfidence { + float_t value; + float_t confidence; +}; + +// Per-dimension confidence, strictly [v0, c0, v1, c1, ...] +template +struct KeypointPerDimConfidence { + ValueWithConfidence data[D]; // value, confidence, value, confidence, ... + static constexpr std::size_t value = 2 * D; // value,conf xD +}; + +// Per-keypoint confidence, strictly [v0, v1, v2 ... confidence] +template +struct KeypointPerKeypointConfidence { + float_t values[D]; + float_t confidence; + static constexpr std::size_t value = D + 1; // D values + 1 confidence +}; + +using Keypoint2D2C = KeypointPerDimConfidence<2>; +using Keypoint2D1C = KeypointPerKeypointConfidence<2>; +using Keypoint3D3C = KeypointPerDimConfidence<3>; +using Keypoint3D1C = KeypointPerKeypointConfidence<3>; + +template +class Keypoints : public Buffer { + public: + std::optional transformation; + + std::vector kpVec; + + Keypoints(std::shared_ptr&& other, xt::xarray&& planarStackedKeypoints); +}; + +template class Keypoints; +typedef Keypoints Keypoints2D; +template class Keypoints; +typedef Keypoints Keypoints2D2C; + +template class Keypoints; +typedef Keypoints Keypoints3D; +template class Keypoints; +typedef Keypoints Keypoints3D3C; +} // namespace dai \ No newline at end of file diff --git a/include/depthai/pipeline/node/host/contrib/host_nodes_ext/parsers/BaseParser.hpp b/include/depthai/pipeline/node/host/contrib/host_nodes_ext/parsers/BaseParser.hpp new file mode 100644 index 0000000000..008fc743b4 --- /dev/null +++ b/include/depthai/pipeline/node/host/contrib/host_nodes_ext/parsers/BaseParser.hpp @@ -0,0 +1,37 @@ +// +// Created by thwdpc on 7/24/25. +// + +#pragma once +#include "depthai/depthai.hpp" + +namespace dai::node { +class BaseParser : public ThreadedHostNode { + public: + Input input{*this, {"in", DEFAULT_GROUP, true, 5, {{{DatatypeEnum::NNData, true}}}, true}}; + Output out{*this, {"out", DEFAULT_GROUP, {{{DatatypeEnum::Buffer, true}}}}}; + virtual std::shared_ptr build(const nn_archive::v1::Head& head, const nn_archive::v1::Model& model); + + protected: + virtual void buildImpl(const nn_archive::v1::Head& head, const nn_archive::v1::Model& model) = 0; +}; + +/** + * @brief Custom node for parser. When creating a custom parser, inherit from this class! + * @tparam T Node type (same as the class you are creating) + * + * Example: + * @code{.cpp} + * class MyParser : public CustomParser { + * std::shared_ptr processGroup(std::shared_ptr in) override { + * auto frame = in->get("data"); + * // process frame + * // ... + * return nullptr; // Don't return anything, just process + * } + * }; + * @endcode + */ +template +using CustomParser = NodeCRTP; +} // namespace dai::node diff --git a/include/depthai/pipeline/node/host/contrib/host_nodes_ext/parsers/KeypointParser.hpp b/include/depthai/pipeline/node/host/contrib/host_nodes_ext/parsers/KeypointParser.hpp new file mode 100644 index 0000000000..fca49016f3 --- /dev/null +++ b/include/depthai/pipeline/node/host/contrib/host_nodes_ext/parsers/KeypointParser.hpp @@ -0,0 +1,30 @@ +// +// Created by thwdpc on 7/25/25. +// + +#pragma once +#include "BaseParser.hpp" + +namespace dai::node { + +enum class ValuesPerKeypoint: uint8_t { + Two = 2, + Three = 3 +}; + +class KeypointParser : virtual public CustomParser { +public: + constexpr static const char* NAME = "KeypointParser"; + +protected: + void buildImpl(const nn_archive::v1::Head& head, const nn_archive::v1::Model& model) override; + void run() override; + + std::vector keypointsOutputs{}; + uint16_t nKeypoints = 17; + // dimensionality: 2D or 3D + ValuesPerKeypoint valuesPerKeypoint = ValuesPerKeypoint::Two; + std::vector keypointNames{}; + std::vector> skeletonEdges{}; +}; +} \ No newline at end of file diff --git a/include/depthai/pipeline/node/host/contrib/host_nodes_ext/parsers/SimCCKeypointParser.hpp b/include/depthai/pipeline/node/host/contrib/host_nodes_ext/parsers/SimCCKeypointParser.hpp new file mode 100644 index 0000000000..fb25f96e94 --- /dev/null +++ b/include/depthai/pipeline/node/host/contrib/host_nodes_ext/parsers/SimCCKeypointParser.hpp @@ -0,0 +1,32 @@ + +#pragma once +#include "KeypointParser.hpp" +#include "parsers/BaseParser.hpp" + +namespace dai::node { + + +class SimCCKeypointParser final : public NodeCRTP { + public: + constexpr static const char* NAME = "SimCCKeypointParser"; + + protected: + void buildImpl(const nn_archive::v1::Head& head, const nn_archive::v1::Model& model) override; + void run() override; + void foggyGuessesForOneDim(const nn_archive::v1::Head& head, + const nn_archive::v1::Model& model, + const nn_archive::v1::Input& imgInput, + const std::pair, std::optional>& imgWHMaybe); + void inferConfigFromMultipleOutputs(const nn_archive::v1::Head& head, + const nn_archive::v1::Model& model, + const nn_archive::v1::Input& imgInput, + std::pair, std::optional>& imgWidthHeight); + + uint8_t pixelSubdivisions = 2; + // Populated if the keypoint # dim and the XY(Z) dimensionality are collapsed(like yolo). Stores whether collapsed dim is interleaved(x1, x2 .. y1, y2 .. z1, z2) + // or planar(x1, y1, z1, x2, y2, z2) + std::optional collapsedDimsAreInterleaved = std::nullopt; + bool replicateXDimToZDim = true; + std::vector simCCDimLengths; +}; +} // namespace dai::node diff --git a/src/pipeline/node/host/contrib/ParserGenerator.cpp b/src/pipeline/node/host/contrib/ParserGenerator.cpp new file mode 100644 index 0000000000..2a59f1d5ab --- /dev/null +++ b/src/pipeline/node/host/contrib/ParserGenerator.cpp @@ -0,0 +1,84 @@ +// +// Created by thwdpc on 7/24/25. +// + +#include "../ParserGenerator.hpp" + +#include +#include +#include + +#include "utility/ErrorMacros.hpp" +#include "parsers/SimCCKeypointParser.hpp" + +namespace dai::node { + +// Utility for device parser names +const std::vector DEVICE_PARSERS = {"YOLO", "SSD"}; + +static const std::unordered_map(Pipeline&)>> parserMap = { + // { "YOLOExtendedParser", [](){ return std::make_shared(); } }, + { std::string(KeypointParser::NAME), [](Pipeline& p) { return std::static_pointer_cast(p.create()); }}, + { std::string(SimCCKeypointParser::NAME), [](Pipeline& p) { return std::static_pointer_cast(p.create()); }} +}; + +std::shared_ptr getHostParserByName(const std::string& parserName, Pipeline& pipeline) { + std::string parserNameExtended; + if(parserName == "YOLO") { + parserNameExtended = "YOLOExtendedParser"; + } else if(parserName == "SSD") { + parserNameExtended = "SSDExtendedParser"; + } else { + parserNameExtended = parserName; + } + DAI_CHECK(parserMap.find(parserNameExtended) != parserMap.end(), "Parser " + parserNameExtended + " not found"); + return parserMap.find(parserNameExtended)->second(pipeline); +} + +std::vector ParserGenerator::generateAllParsers(Pipeline pipeline, const NNArchive& nnArchive, const bool hostOnly) { + auto [model, heads] = archiveGetModelEnsureOneHeadV1(nnArchive, pipeline.getDefaultDevice()->getPlatform()); + + std::vector parsers; + + for(int i = 0; i < heads.size(); i++) { + HostOrDeviceParser parser = generateOneV1Parser(pipeline, nnArchive, heads[i], model, hostOnly); + parsers.push_back(std::move(parser)); + } + return parsers; +} + + +ConfigModelWithHeads ParserGenerator::archiveGetModelEnsureOneHeadV1(const NNArchive& nnArchive, const Platform targetPlatform) { + const auto& nnArchiveCfg = nnArchive.getVersionedConfig(); + + DAI_CHECK_V(nnArchiveCfg.getVersion() == NNArchiveConfigVersion::V1, "Only V1 configs are supported for NeuralNetwork.build method"); + auto supportedPlatforms = nnArchive.getSupportedPlatforms(); + bool platformSupported = std::find(supportedPlatforms.begin(), supportedPlatforms.end(), targetPlatform) != supportedPlatforms.end(); + DAI_CHECK_V(platformSupported, "Platform not supported by the neural network model"); + + // Get model heads + auto [_, model] = nnArchive.getConfig(); + + if(const auto headsOpt = model.heads) { + if(const auto headsV1 = *headsOpt; !headsV1.empty()) { + return ConfigModelWithHeads{.model = model, .heads = headsV1}; + } + } + throw std::runtime_error(fmt::format("No heads defined in the NN Archive.")); +} + +HostOrDeviceParser ParserGenerator::generateOneV1Parser( + Pipeline& pipeline, const NNArchive& owningArchive, const nn_archive::v1::Head& head, const nn_archive::v1::Model& model, const bool hostOnly) { + std::string parser_name = head.parser; + + // If this *could* be an on-device parser(currently just DetectionParser) then check whether that's allowed by !hostOnly + if(std::find(DEVICE_PARSERS.begin(), DEVICE_PARSERS.end(), parser_name) != DEVICE_PARSERS.end() && !hostOnly) { + // Device parser handling + auto device_parser = pipeline.create(); + device_parser->setNNArchive(owningArchive); + return device_parser; + } + return getHostParserByName(parser_name, pipeline)->build(head, model); +} + +} // namespace dai::node diff --git a/src/pipeline/node/host/contrib/host_nodes_ext/ParsingNeuralNetwork.cpp b/src/pipeline/node/host/contrib/host_nodes_ext/ParsingNeuralNetwork.cpp new file mode 100644 index 0000000000..7982cd9711 --- /dev/null +++ b/src/pipeline/node/host/contrib/host_nodes_ext/ParsingNeuralNetwork.cpp @@ -0,0 +1,94 @@ +// +// Created by thwdpc on 7/24/25. +// + +#include "ParsingNeuralNetwork.hpp" + +#include +#include +#include +#include +#include +namespace dai::node { + +std::shared_ptr ParsingNeuralNetwork::build(Output& input, const NNArchive& nnArchive) { + nn = getParentPipeline().create(); + nn->build(input, nnArchive); + updateParsers(nnArchive); + return std::static_pointer_cast(shared_from_this()); +} + +std::shared_ptr ParsingNeuralNetwork::build(const std::shared_ptr& input, + NNModelDescription modelDesc, + const std::optional fps) { + nn = getParentPipeline().create(); + nn->build(input, std::move(modelDesc), fps); + try { + const NNArchive& archive = nn->getNNArchive().value(); + updateParsers(archive); + } catch(std::bad_optional_access& e) { + std::cout << "NeuralNetwork node failed to create an archive and failed silently, getNNArchive returned std::nullopt: " << e.what() << std::endl; + } + return std::static_pointer_cast(shared_from_this()); +} + +std::shared_ptr ParsingNeuralNetwork::build(const std::shared_ptr& input, + const NNArchive& nnArchive, + const std::optional fps) { + nn = getParentPipeline().create(); + nn->build(input, nnArchive, fps); + updateParsers(nnArchive); + return std::static_pointer_cast(shared_from_this()); +} + +// Updates parsers based on the provided NNArchive +void ParsingNeuralNetwork::updateParsers(const NNArchive& nnArchive) { + removeOldParserNodes(); + parsers = getParserNodes(nnArchive); +} + +// Removes previously created parser nodes and internal sync node from the pipeline +void ParsingNeuralNetwork::removeOldParserNodes() { + for(const auto& entry : parsers) { + std::visit([this](auto& p) { getParentPipeline().remove(p); }, entry); + } + if(parsers.size() > 1) { + parserSync = std::nullopt; + } + parsers.clear(); +} + +void ParsingNeuralNetwork::run() { + DAI_CHECK(nn != nullptr, "ParsingNeuralNetwork run before NN was initialized(was this node built via `build()`?)"); + DAI_CHECK(nn->getNNArchive() != std::nullopt, "ParsingNeuralNetwork run before fried NN was given archive(was this node built via `build()`?)"); +} + +// Creates new parser nodes from NNArchive, links their input/output, and returns them +std::vector ParsingNeuralNetwork::getParserNodes(const NNArchive& nnArchive) { + std::vector newParsers = ParserGenerator::generateAllParsers(getParentPipeline(), nnArchive); + + if(auto& newParser = newParsers[0]; newParsers.size() == 1) { + std::visit( + [this](auto& p) { + nn->out.link(p->input); + out = p->out; + }, + newParser); + } else { + auto sync = parserSync.value_or(Subnode(*this, "sync")); + for(std::size_t idx = 0; idx < newParsers.size(); ++idx) { + std::visit( + [this, idx, sync](auto& p) { + nn->out.link(p->input); + p->out.link(sync->inputs[std::to_string(idx)]); + }, + parsers[idx]); + } + parserSync = sync; + out = sync->out; + } + + return newParsers; +} + +} // namespace dai::node diff --git a/src/pipeline/node/host/contrib/host_nodes_ext/messages/Keypoints.cpp b/src/pipeline/node/host/contrib/host_nodes_ext/messages/Keypoints.cpp new file mode 100644 index 0000000000..d2a0f9aef4 --- /dev/null +++ b/src/pipeline/node/host/contrib/host_nodes_ext/messages/Keypoints.cpp @@ -0,0 +1,35 @@ +// +// Created by thwdpc on 7/28/25. +// + +#include "messages/Keypoints.hpp" + +#include + +#include "utility/ErrorMacros.hpp" + +namespace dai { +template +Keypoints::Keypoints(std::shared_ptr&& other, xt::xarray&& planarStackedKeypoints){ + // KP#, dim + const size_t numKeypoints = planarStackedKeypoints.shape()[0], numDimsFound = planarStackedKeypoints.shape()[1]; + + DAI_CHECK_V(numDimsFound == KP::value, + "Trying to build {} dimensional keypoints, got {} sets of keypoints/confidence values", + KP::value, + numDimsFound); + + kpVec = std::vector(numKeypoints); + // Direct copy into the vec + assert(sizeof(KP) == sizeof(float) * KP::value); + assert(planarStackedKeypoints.size() == numKeypoints * KP::value); + std::memcpy(kpVec.data(), planarStackedKeypoints.data(), planarStackedKeypoints.size() * sizeof(float)); + + transformation = other->transformation; + setTimestamp(other->getTimestamp()); + setSequenceNum(other->sequenceNum); +} + + + +} // namespace dai \ No newline at end of file diff --git a/src/pipeline/node/host/contrib/host_nodes_ext/parsers/BaseParser.cpp b/src/pipeline/node/host/contrib/host_nodes_ext/parsers/BaseParser.cpp new file mode 100644 index 0000000000..708c37a0b3 --- /dev/null +++ b/src/pipeline/node/host/contrib/host_nodes_ext/parsers/BaseParser.cpp @@ -0,0 +1,15 @@ +// +// Created by thwdpc on 7/24/25. +// + +#include "parsers/BaseParser.hpp" +#include "utility/ErrorMacros.hpp" + +namespace dai::node { + +std::shared_ptr BaseParser::build(const nn_archive::v1::Head& head, const nn_archive::v1::Model& model) { + DAI_CHECK_IN(head.parser == getName()); + buildImpl(head, model); + return std::static_pointer_cast(shared_from_this()); +} +} // namespace dai::node \ No newline at end of file diff --git a/src/pipeline/node/host/contrib/host_nodes_ext/parsers/KeypointParser.cpp b/src/pipeline/node/host/contrib/host_nodes_ext/parsers/KeypointParser.cpp new file mode 100644 index 0000000000..73e4073c08 --- /dev/null +++ b/src/pipeline/node/host/contrib/host_nodes_ext/parsers/KeypointParser.cpp @@ -0,0 +1,65 @@ +// +// Created by thwdpc on 7/25/25. +// + +#include "parsers/KeypointParser.hpp" + +#include + +#include "utility/ErrorMacros.hpp" + +namespace dai::node { +void KeypointParser::buildImpl(const nn_archive::v1::Head& head, const nn_archive::v1::Model& model) { + bool fallback = false; + if(const auto layers = head.metadata.keypointsOutputs) { + for(auto& layerName : *layers) { + auto output = std::find_if(model.outputs.begin(), model.outputs.end(), [&](const auto& o) { return o.name == layerName; }); + DAI_CHECK_V(output != model.outputs.end(), "{}: keypoint output {} not found in model", getName(), layerName); + keypointsOutputs.push_back(*output); + } + } else { + spdlog::trace("KeypointParser(or subclass) did not receive keypoints_outputs, fallback to using all outputs"); + for(auto& output : model.outputs) { + keypointsOutputs.push_back(output); + }; + fallback = true; + } + + const uint8_t ko_sz = keypointsOutputs.size(); + if(ko_sz < 1 || ko_sz > 3) { + const std::string where = fallback ? "During fallback to use all outputs" : "Configured keypoints_outputs"; + throw std::runtime_error(fmt::format("{w}: size {sz} must satisfy 1 <= {sz} <= 3 ", fmt::arg("w", where), fmt::arg("sz", ko_sz))); + } + + // take outputs size if it makes sense else default + switch(head.metadata.extraParams.value("values_per_keypoint", ko_sz > 1 ? ko_sz : static_cast(valuesPerKeypoint))) { + case 2: + valuesPerKeypoint = ValuesPerKeypoint::Two; + break; + case 3: + valuesPerKeypoint = ValuesPerKeypoint::Three; + break; + default: + DAI_CHECK_IN(false); + break; + } + + DAI_CHECK_V(ko_sz == 1 || ko_sz == static_cast(valuesPerKeypoint), + "Expected one output per keypoint dimension, or one output that contains all keypoints, got {} layers vs dimensionality {}.", + ko_sz, + static_cast(valuesPerKeypoint)); + + if(const auto n = head.metadata.nKeypoints) { + nKeypoints = *n; + } else { + spdlog::warn("SimCCKeypointParser did not receive n_keypoints, defaulting to standard COCO 17. Populating this field is strongly encouraged"); + } + + keypointNames = head.metadata.extraParams.value("keypoint_names", keypointNames); + skeletonEdges = head.metadata.extraParams.value("skeleton_edges", skeletonEdges); +} +void KeypointParser::run() { + assert(false); //TODO KeypointParser::run +} + +} // namespace dai::node \ No newline at end of file diff --git a/src/pipeline/node/host/contrib/host_nodes_ext/parsers/SimCCKeypointParser.cpp b/src/pipeline/node/host/contrib/host_nodes_ext/parsers/SimCCKeypointParser.cpp new file mode 100644 index 0000000000..22d17521b0 --- /dev/null +++ b/src/pipeline/node/host/contrib/host_nodes_ext/parsers/SimCCKeypointParser.cpp @@ -0,0 +1,209 @@ +#include "parsers/SimCCKeypointParser.hpp" + +#include "utility/ErrorMacros.hpp" +#include "depthai/nn_archive/v1/InputType.hpp" +#include "messages/Keypoints.hpp" +#include "spdlog/spdlog.h" +#include "xtensor/containers/xarray.hpp" +#include "xtensor/views/xindex_view.hpp" +#include "xtensor/misc/xsort.hpp" + +namespace dai::node { + +void SimCCKeypointParser::foggyGuessesForOneDim(const nn_archive::v1::Head& head, + const nn_archive::v1::Model& model, + const nn_archive::v1::Input& imgInput, + const std::pair, std::optional>& imgWHMaybe) { + std::pair imgWidthHeight; + if(imgWHMaybe.first && imgWHMaybe.second) { + imgWidthHeight = {static_cast(*imgWHMaybe.first), static_cast(*imgWHMaybe.second)}; + } else { + std::unordered_set seen; + for(int64_t n : imgInput.shape) { + if(seen.find(n) != seen.end()) { + spdlog::warn("Input layout not found, assuming duplicated dimension {} is width and height", n); + imgWidthHeight = {static_cast(n), static_cast(n)}; + break; + } + seen.insert(n); + } + } + + DAI_CHECK_V(imgWidthHeight.first && imgWidthHeight.first == imgWidthHeight.second, + "Input image width and height must match for a single output combined dim") + uint16_t inputImgDim = imgWidthHeight.first; + + simCCDimLengths = {static_cast(inputImgDim * pixelSubdivisions)}; + + if(auto shape = keypointsOutputs[0].shape) { + // N 1 C ( 2/3 ) * KPDims D = W = H + const int product = std::accumulate(shape->begin(), shape->end(), 1, std::multiplies()); + const int expected = nKeypoints * static_cast(valuesPerKeypoint) * (pixelSubdivisions * (inputImgDim * 2)); + // I don't know what to do, phone it in + DAI_CHECK_V(product == expected, "The developer of this project was not even sure a model like yours would exist, best efforts were made but alas...") + // Whether the keypoint # dim is separate from the keypoint XY(Z) dimension(yolo does this) + for(auto& dim : *shape) { + if(dim == nKeypoints * static_cast(valuesPerKeypoint)) { + collapsedDimsAreInterleaved = head.metadata.extraParams.value("collapsed_dims_are_interleaved", false); + spdlog::trace("Observed output dim {} == {} * {}, assuming collapsed dim", dim, nKeypoints, static_cast(valuesPerKeypoint)); + break; + } + } + } +} + +// Parse the outputs to find XY(Z) ordering, throwing if the output layout exists and "disagrees" with the order of the outputs in the config +void SimCCKeypointParser::inferConfigFromMultipleOutputs(const nn_archive::v1::Head& head, + const nn_archive::v1::Model& model, + const nn_archive::v1::Input& imgInput, + std::pair, std::optional>& imgWidthHeight) { + if(const auto& [maybeWidth, maybeHeight] = imgWidthHeight; maybeWidth && maybeHeight) { + simCCDimLengths = {static_cast(*maybeWidth * pixelSubdivisions), static_cast(*maybeHeight * pixelSubdivisions)}; + if(valuesPerKeypoint == ValuesPerKeypoint::Three) { + replicateXDimToZDim = head.metadata.extraParams.value("input_z_dim_equal_x_dim", replicateXDimToZDim); + simCCDimLengths.push_back(static_cast(replicateXDimToZDim ? *maybeWidth * pixelSubdivisions : *maybeHeight * pixelSubdivisions)); + } + } else { + throw std::runtime_error(fmt::format( + "Refusing to construct node {} without a type: image model.input with populated layout which contains W and H, too much guesswork involved.", + getName())); + } + // X == W || X == D, Y == H || Y == D, Z == D + std::vector outputDimPx = {'w', 'h', 'd'}; + bool anyMissingLayouts = false; + for(int i = 0; i < keypointsOutputs.size(); i++) { + if(auto& shape_maybe = keypointsOutputs[i].shape) { + std::vector shapeV = *shape_maybe; + DAI_CHECK_V(shapeV.back() == simCCDimLengths[i], + "Expected output '{}' last dim {} == {}(pixel dimension from input * SimCC pixel subdivisions)", + keypointsOutputs[i].name, + shapeV.back(), + simCCDimLengths[i]); + } + if(auto& layout = keypointsOutputs[i].layout) { + bool found = false; + for(auto& dim : *layout) { + char dimL = std::tolower(dim); + found |= dimL == outputDimPx[i] || dimL == 'd'; + } + DAI_CHECK_V(found, "Output '{}' layout does not contain expected `{}` or wildcard `d` (case insensitive)", keypointsOutputs[i].name, outputDimPx[i]) + } else { + anyMissingLayouts = true; + } + } + if(anyMissingLayouts) { + std::string assumptions = ""; + std::vector outputDims = {'X', 'Y', 'Z'}; + for(int i = 0; i < keypointsOutputs.size(); i++) { + std::string assumedLayout = fmt::format("[{}, {} * {}]", nKeypoints, simCCDimLengths[i] / 2.0f, pixelSubdivisions); + assumptions += fmt::format("- Output {} is dimension: {}, with layout {}\n", keypointsOutputs[i].name, outputDims[i], assumedLayout); + } + spdlog::warn( + "One or more output layouts not found for keypoint outputs, it is highly encouraged to include output layouts, assuming/found the following: \n{}", + assumptions); + } +} + +void SimCCKeypointParser::buildImpl(const nn_archive::v1::Head& head, const nn_archive::v1::Model& model) { + KeypointParser::buildImpl(head, model); + try { + pixelSubdivisions = head.metadata.extraParams["pixel_subdivisions"]; + } catch(...) { + spdlog::warn("{}: pixel_subdivisions not found in metadata, using default value {}", getName(), pixelSubdivisions); + } + + // Find the image input so we can educate our guesses + auto imgInput = std::find_if(model.inputs.begin(), model.inputs.end(), [](const auto& i) { return i.inputType == nn_archive::v1::InputType::IMAGE; }); + DAI_CHECK_V(imgInput != model.inputs.end(), "Expecting image input to find pixel comparisons") + + std::pair, std::optional> imgWidthHeight; + if(const auto inputImgLayout = imgInput->layout) { + std::string layout = *inputImgLayout; + DAI_CHECK_V(imgInput->shape.size() == layout.size(), "Input shape and layout length mismatch") + for(int i = 0; i < inputImgLayout->size(); i++) { + if(std::tolower(layout[i]) == 'w') { + imgWidthHeight.first = imgInput->shape.at(i); + } + if(std::tolower(layout[i]) == 'h') { + imgWidthHeight.second = imgInput->shape.at(i); + } + } + } + + if(keypointsOutputs.size() == 1) { + foggyGuessesForOneDim(head, model, *imgInput, imgWidthHeight); + } else { + inferConfigFromMultipleOutputs(head, model, *imgInput, imgWidthHeight); + } + + // TODO don't make assumptions that last dim of output is simcc +} + +void SimCCKeypointParser::run() { + while(isRunning()) { + // TODO not just my application + std::shared_ptr outputMessage; + // Build a new nested scope to prevent use after move because I wish I was in rust + { + std::shared_ptr result; + try { + result = this->input.get(); + } catch([[maybe_unused]] MessageQueue::QueueException(&e)) { + break; + } + std::vector> foundLayers = {}; + for(int i = 0; i < keypointsOutputs.size(); i++) { + std::string layerName = keypointsOutputs[i].name; + DAI_CHECK_V(result->hasLayer(layerName), "Expecting layer {} in NNData", layerName) + xt::xarray tensor = result->getTensor(layerName); + // TODO maybe it respects NNArchive and is {1, nKeypoints, simCCDimLengths[i]} ----- IT DOES + std::vector shapeBatched = {1, nKeypoints, simCCDimLengths[i]}; + std::vector shapeUnbatched = {nKeypoints, simCCDimLengths[i]}; + DAI_CHECK_V(tensor.shape() == shapeUnbatched || tensor.shape() == shapeBatched, "Expecting tensor {} to have shape [1(optional), {}, {}]", layerName, shapeUnbatched[0], shapeUnbatched[1]) + xt::reshape_view(tensor, {nKeypoints, simCCDimLengths[i]}); // view as KP#, PX * splitRatio(pixelSubdivisions) + foundLayers.push_back(tensor); + } + // probably not needed but feels wrong not to + assert(foundLayers.size() == keypointsOutputs.size()); + + xt::xarray output = xt::zeros({static_cast(nKeypoints), foundLayers.size() * 2}); + for(int i = 0; i < foundLayers.size(); i++) { + xt::xarray prediction = xt::reshape_view(foundLayers[i], {nKeypoints, simCCDimLengths[i]}); + // TODO IF THIS IS SLOW https://github.com/xtensor-stack/xtensor/issues/2046 + // locationsUnsplit: shape = [nKeypoints], integer indices + xt::xarray locationsUnsplit = xt::flatten(xt::argmax(prediction, -1)); + // Normalize + xt::xarray locationsNormalized = locationsUnsplit / static_cast(simCCDimLengths[i]); + + // TODO XTensor not working as shown in docs, I'm done caring and just using xt::amax + // // Create indices shape [nKeypoints, 2], where kp index, px subdivision index + // xt::xarray indices = xt::stack(xt::xtuple(xt::arange(nKeypoints), locationsUnsplit), 1); + // std::vector expected{nKeypoints, 2}; + // DAI_CHECK_IN(indices.shape() == expected); + // xt::xarray a = {{11, 12, 13}, {24, 25, 26}}; + // xt::xarray idxs = {{0, 0}, {1, 0}, {0, 1}}; + // auto b = xt::index_view(a, idxs); + // for(auto el : b) { + // std::cout << el << ", "; // Why does this print 11, 11, 12, 11, 11, 12, ?????? + // } + // std::cout << std::endl; + // xt::ravel_return_type_t>, xt::ravel_tensor_tag> flat_indices = + // xt::ravel_indices(xt::argwhere(a >= 6), a.shape()); + // // TODO why is this 266 not 133? + // auto confidence = xt::index_view(prediction, indices); + + auto confidence = xt::amax(prediction, -1); + + // Clamp + auto confidenceClamped = xt::clip(confidence, 0.0f, 1.0f); + + xt::view(output, xt::all(), 2 * i) = locationsNormalized; + xt::view(output, xt::all(), 2 * i + 1) = confidenceClamped; + } + outputMessage = std::make_shared(std::move(result), std::move(output)); + } + out.send(std::static_pointer_cast(outputMessage)); + } +} + +} // namespace dai::node \ No newline at end of file