Skip to content

Commit cda4b7d

Browse files
committed
SWDEV-475341 - Fix stream resolution for graphs launches
This issue was happening because of incorrect usage of getStream call, if we get the null stream first and then typecast it, and call on getStream again, we lose the advantage of simply passing "nullptr" to indicate NULL stream. Thus we enter the waitActiveStream call and add barriers to sync across streams. Change-Id: I94dc4e3ec927295b9e1ab6dee4b37d7d3e00b0cc
1 parent 4fbd7ab commit cda4b7d

File tree

2 files changed

+39
-43
lines changed

2 files changed

+39
-43
lines changed

hipamd/src/hip_graph_internal.cpp

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -561,7 +561,7 @@ hipError_t EnqueueGraphWithSingleList(std::vector<hip::Node>& topoOrder, hip::St
561561
} else {
562562
topoOrder[i]->SetStream(hip_stream, graphExec);
563563
status = topoOrder[i]->CreateCommand(topoOrder[i]->GetQueue());
564-
topoOrder[i]->EnqueueCommands(reinterpret_cast<hipStream_t>(hip_stream));
564+
topoOrder[i]->EnqueueCommands(hip_stream);
565565
}
566566
}
567567

@@ -572,17 +572,14 @@ hipError_t EnqueueGraphWithSingleList(std::vector<hip::Node>& topoOrder, hip::St
572572
return status;
573573
}
574574

575-
hipError_t GraphExec::Run(hipStream_t stream) {
575+
hipError_t GraphExec::Run(hipStream_t graph_launch_stream) {
576576
hipError_t status = hipSuccess;
577577

578-
if (hip::getStream(stream) == nullptr) {
579-
return hipErrorInvalidResourceHandle;
580-
}
581-
auto hip_stream = (stream == nullptr) ? hip::getCurrentDevice()->NullStream()
582-
: reinterpret_cast<hip::Stream*>(stream);
578+
hip::Stream* launch_stream = hip::getStream(graph_launch_stream);
579+
583580
if (flags_ & hipGraphInstantiateFlagAutoFreeOnLaunch) {
584581
if (!topoOrder_.empty()) {
585-
topoOrder_[0]->GetParentGraph()->FreeAllMemory(hip_stream);
582+
topoOrder_[0]->GetParentGraph()->FreeAllMemory(launch_stream);
586583
}
587584
}
588585

@@ -599,31 +596,31 @@ hipError_t GraphExec::Run(hipStream_t stream) {
599596
}
600597

601598
if (parallelLists_.size() == 1 &&
602-
instantiateDeviceId_ == hip_stream->DeviceId()) {
599+
instantiateDeviceId_ == launch_stream->DeviceId()) {
603600
if (DEBUG_CLR_GRAPH_PACKET_CAPTURE) {
604601
// If the graph has kernels that does device side allocation, during packet capture, heap is
605602
// allocated because heap pointer has to be added to the AQL packet, and initialized during
606603
// graph launch.
607604
static bool initialized = false;
608605
if (!initialized && HasHiddenHeap()) {
609-
hip_stream->vdev()->HiddenHeapInit();
606+
launch_stream->vdev()->HiddenHeapInit();
610607
initialized = true;
611608
}
612609
}
613-
status = EnqueueGraphWithSingleList(topoOrder_, hip_stream, this);
610+
status = EnqueueGraphWithSingleList(topoOrder_, launch_stream, this);
614611
} else if (parallelLists_.size() == 1 &&
615-
instantiateDeviceId_ != hip_stream->DeviceId()) {
612+
instantiateDeviceId_ != launch_stream->DeviceId()) {
616613
for (int i = 0; i < topoOrder_.size(); i++) {
617-
topoOrder_[i]->SetStream(hip_stream, this);
614+
topoOrder_[i]->SetStream(launch_stream, this);
618615
status = topoOrder_[i]->CreateCommand(topoOrder_[i]->GetQueue());
619-
topoOrder_[i]->EnqueueCommands(stream);
616+
topoOrder_[i]->EnqueueCommands(launch_stream);
620617
}
621618
} else {
622-
UpdateStream(parallelLists_, hip_stream, this);
619+
UpdateStream(parallelLists_, launch_stream, this);
623620
amd::Command* rootCommand = nullptr;
624621
amd::Command* endCommand = nullptr;
625622
status = FillCommands(parallelLists_, nodeWaitLists_, topoOrder_, clonedGraph_, rootCommand,
626-
endCommand, hip_stream);
623+
endCommand, launch_stream);
627624
if (status != hipSuccess) {
628625
return status;
629626
}
@@ -632,15 +629,15 @@ hipError_t GraphExec::Run(hipStream_t stream) {
632629
rootCommand->release();
633630
}
634631
for (int i = 0; i < topoOrder_.size(); i++) {
635-
topoOrder_[i]->EnqueueCommands(reinterpret_cast<hipStream_t>(topoOrder_[i]->GetQueue()));
632+
topoOrder_[i]->EnqueueCommands(topoOrder_[i]->GetQueue());
636633
}
637634
if (endCommand != nullptr) {
638635
endCommand->enqueue();
639636
endCommand->release();
640637
}
641638
}
642639
amd::ScopedLock lock(GraphExecStatusLock_);
643-
GraphExecStatus_[this] = std::make_pair(hip_stream, false);
640+
GraphExecStatus_[this] = std::make_pair(launch_stream, false);
644641
ResetQueueIndex();
645642
return status;
646643
}

hipamd/src/hip_graph_internal.hpp

Lines changed: 24 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,7 @@ struct GraphNode : public hipGraphNodeDOTAttribute {
349349
}
350350
virtual hipError_t GetNumParallelStreams(size_t &num) { return hipSuccess; }
351351
/// Enqueue commands part of the node
352-
virtual void EnqueueCommands(hipStream_t stream) {
352+
virtual void EnqueueCommands(hip::Stream* stream) {
353353
// If the node is disabled it becomes empty node. To maintain ordering just enqueue marker.
354354
// Node can be enabled/disabled only for kernel, memcpy and memset nodes.
355355
if (!isEnabled_ &&
@@ -359,8 +359,7 @@ struct GraphNode : public hipGraphNodeDOTAttribute {
359359
if (!commands_.empty()) {
360360
waitList = commands_[0]->eventWaitList();
361361
}
362-
hip::Stream* hip_stream = hip::getStream(stream);
363-
amd::Command* command = new amd::Marker(*hip_stream, !kMarkerDisableFlush, waitList);
362+
amd::Command* command = new amd::Marker(*stream, !kMarkerDisableFlush, waitList);
364363
command->enqueue();
365364
command->release();
366365
return;
@@ -811,10 +810,10 @@ struct ChildGraphNode : public GraphNode {
811810
bool TopologicalOrder(std::vector<Node>& TopoOrder) override {
812811
return childGraph_->TopologicalOrder(TopoOrder);
813812
}
814-
void EnqueueCommands(hipStream_t stream) override {
813+
void EnqueueCommands(hip::Stream* stream) override {
815814
if (graphCaptureStatus_) {
816815
hipError_t status =
817-
EnqueueGraphWithSingleList(childGraphNodeOrder_, reinterpret_cast<hip::Stream*>(stream));
816+
EnqueueGraphWithSingleList(childGraphNodeOrder_, stream);
818817
} else {
819818
// enqueue child graph start command
820819
if (startCommand_ != nullptr) {
@@ -869,16 +868,15 @@ class GraphKernelNode : public GraphNode {
869868

870869
public:
871870
bool HasHiddenHeap() const { return hasHiddenHeap_; }
872-
void EnqueueCommands(hipStream_t stream) override {
871+
void EnqueueCommands(hip::Stream* stream) override {
873872
// If the node is disabled it becomes empty node. To maintain ordering just enqueue marker.
874873
// Node can be enabled/disabled only for kernel, memcpy and memset nodes.
875874
if (!isEnabled_) {
876875
amd::Command::EventWaitList waitList;
877876
if (!commands_.empty()) {
878877
waitList = commands_[0]->eventWaitList();
879878
}
880-
hip::Stream* hip_stream = hip::getStream(stream);
881-
amd::Command* command = new amd::Marker(*hip_stream, !kMarkerDisableFlush, waitList);
879+
amd::Command* command = new amd::Marker(*stream, !kMarkerDisableFlush, waitList);
882880
command->enqueue();
883881
command->release();
884882
return;
@@ -892,6 +890,7 @@ class GraphKernelNode : public GraphNode {
892890
command->release();
893891
}
894892
}
893+
895894
void PrintAttributes(std::ostream& out, hipGraphDebugDotFlags flag) override {
896895
out << "[";
897896
out << "style";
@@ -1316,12 +1315,12 @@ class GraphMemcpyNode : public GraphNode {
13161315
return status;
13171316
}
13181317

1319-
virtual void EnqueueCommands(hipStream_t stream) override {
1318+
virtual void EnqueueCommands(hip::Stream* stream) override {
13201319
if ( (copyParams_.kind == hipMemcpyHostToHost || copyParams_.kind == hipMemcpyDefault) &&
13211320
isEnabled_ && IsHtoHMemcpy(copyParams_.dstPtr.ptr, copyParams_.srcPtr.ptr)) {
13221321
ihipHtoHMemcpy(copyParams_.dstPtr.ptr, copyParams_.srcPtr.ptr,
13231322
copyParams_.extent.width * copyParams_.extent.height *
1324-
copyParams_.extent.depth, *hip::getStream(stream));
1323+
copyParams_.extent.depth, *stream);
13251324
return;
13261325
}
13271326
GraphNode::EnqueueCommands(stream);
@@ -1470,7 +1469,7 @@ class GraphMemcpyNode1D : public GraphMemcpyNode {
14701469
return status;
14711470
}
14721471

1473-
virtual void EnqueueCommands(hipStream_t stream) override {
1472+
virtual void EnqueueCommands(hip::Stream* stream) override {
14741473
bool isH2H = false;
14751474
if ((kind_ == hipMemcpyHostToHost || kind_ == hipMemcpyDefault) && IsHtoHMemcpy(dst_, src_)) {
14761475
isH2H = true;
@@ -1483,22 +1482,21 @@ class GraphMemcpyNode1D : public GraphMemcpyNode {
14831482
if (isEnabled_) {
14841483
//HtoH
14851484
if (isH2H) {
1486-
ihipHtoHMemcpy(dst_, src_, count_, *hip::getStream(stream));
1485+
ihipHtoHMemcpy(dst_, src_, count_, *stream);
14871486
return;
14881487
}
14891488
amd::Command* command = commands_[0];
14901489
amd::HostQueue* cmdQueue = command->queue();
1491-
hip::Stream* hip_stream = hip::getStream(stream);
14921490

1493-
if (cmdQueue == hip_stream) {
1491+
if (cmdQueue == stream) {
14941492
command->enqueue();
14951493
command->release();
14961494
return;
14971495
}
14981496

14991497
amd::Command::EventWaitList waitList;
15001498
amd::Command* depdentMarker = nullptr;
1501-
amd::Command* cmd = hip_stream->getLastQueuedCommand(true);
1499+
amd::Command* cmd = stream->getLastQueuedCommand(true);
15021500
if (cmd != nullptr) {
15031501
waitList.push_back(cmd);
15041502
amd::Command* depdentMarker = new amd::Marker(*cmdQueue, true, waitList);
@@ -1515,7 +1513,7 @@ class GraphMemcpyNode1D : public GraphMemcpyNode {
15151513
if (cmd != nullptr) {
15161514
waitList.clear();
15171515
waitList.push_back(cmd);
1518-
amd::Command* depdentMarker = new amd::Marker(*hip_stream, true, waitList);
1516+
amd::Command* depdentMarker = new amd::Marker(*stream, true, waitList);
15191517
if (depdentMarker != nullptr) {
15201518
depdentMarker->enqueue(); // Make sure future commands of queue synced with command
15211519
depdentMarker->release();
@@ -1524,8 +1522,7 @@ class GraphMemcpyNode1D : public GraphMemcpyNode {
15241522
}
15251523
} else {
15261524
amd::Command::EventWaitList waitList;
1527-
hip::Stream* hip_stream = hip::getStream(stream);
1528-
amd::Command* command = new amd::Marker(*hip_stream, !kMarkerDisableFlush, waitList);
1525+
amd::Command* command = new amd::Marker(*stream, !kMarkerDisableFlush, waitList);
15291526
command->enqueue();
15301527
command->release();
15311528
}
@@ -2005,11 +2002,12 @@ class GraphEventRecordNode : public GraphNode {
20052002
return status;
20062003
}
20072004

2008-
void EnqueueCommands(hipStream_t stream) override {
2005+
void EnqueueCommands(hip::Stream* stream) override {
20092006
if (!commands_.empty()) {
20102007
hip::Event* e = reinterpret_cast<hip::Event*>(event_);
20112008
// command release during enqueueRecordCommand
2012-
hipError_t status = e->enqueueRecordCommand(stream, commands_[0], true);
2009+
hipError_t status = e->enqueueRecordCommand(
2010+
reinterpret_cast<hipStream_t>(stream), commands_[0], true);
20132011
if (status != hipSuccess) {
20142012
ClPrint(amd::LOG_ERROR, amd::LOG_CODE,
20152013
"[hipGraph] Enqueue event record command failed for node %p - status %d", this,
@@ -2058,10 +2056,11 @@ class GraphEventWaitNode : public GraphNode {
20582056
return status;
20592057
}
20602058

2061-
void EnqueueCommands(hipStream_t stream) override {
2059+
void EnqueueCommands(hip::Stream* stream) override {
20622060
if (!commands_.empty()) {
20632061
hip::Event* e = reinterpret_cast<hip::Event*>(event_);
2064-
hipError_t status = e->enqueueStreamWaitCommand(stream, commands_[0]);
2062+
hipError_t status =
2063+
e->enqueueStreamWaitCommand(reinterpret_cast<hipStream_t>(stream), commands_[0]);
20652064
if (status != hipSuccess) {
20662065
ClPrint(amd::LOG_ERROR, amd::LOG_CODE,
20672066
"[hipGraph] Enqueue stream wait command failed for node %p - status %d", this,
@@ -2119,7 +2118,7 @@ class GraphHostNode : public GraphNode {
21192118
NodeParams->fn(NodeParams->userData);
21202119
}
21212120

2122-
void EnqueueCommands(hipStream_t stream) override {
2121+
void EnqueueCommands(hip::Stream* stream) override {
21232122
if (!commands_.empty()) {
21242123
if (!commands_[0]->setCallback(CL_COMPLETE, GraphHostNode::Callback, &NodeParams_)) {
21252124
ClPrint(amd::LOG_ERROR, amd::LOG_CODE, "[hipGraph] Failed during setCallback");
@@ -2443,7 +2442,7 @@ class GraphDrvMemcpyNode : public GraphNode {
24432442
return status;
24442443
}
24452444

2446-
void EnqueueCommands(hipStream_t stream) override {
2445+
void EnqueueCommands(hip::Stream* stream) override {
24472446
bool isHtoH = false;
24482447
if(copyParams_.srcMemoryType == hipMemoryTypeHost &&
24492448
copyParams_.dstMemoryType == hipMemoryTypeHost &&
@@ -2453,7 +2452,7 @@ class GraphDrvMemcpyNode : public GraphNode {
24532452
if (isEnabled_ && isHtoH) {
24542453
ihipHtoHMemcpy(copyParams_.dstHost, copyParams_.srcHost,
24552454
copyParams_.WidthInBytes * copyParams_.Height *
2456-
copyParams_.Depth, *hip::getStream(stream));
2455+
copyParams_.Depth, *stream);
24572456
return;
24582457
}
24592458
GraphNode::EnqueueCommands(stream);

0 commit comments

Comments
 (0)