@@ -399,141 +399,49 @@ int CISA_IR_Builder::AddFunction(VISAFunction *& function, const char* functionN
399
399
// default size of the physical reg pool mem manager in bytes
400
400
#define PHY_REG_MEM_SIZE (16 *1024 )
401
401
402
- struct FCallState
403
- {
404
- G4_INST* fcallInst;
405
- G4_Operand* opnd0;
406
- G4_Operand* opnd1;
407
- G4_BB* retBlock;
408
- unsigned int execSize;
409
- };
410
-
411
- struct SavedFCallStates
412
- {
413
- std::vector<std::pair<G4_Kernel*, FCallState>> states;
414
- std::vector<G4_BB*> retbbs;
415
- };
416
-
417
- void saveFCallState (G4_Kernel* kernel, SavedFCallStates& savedFCallState)
418
- {
419
- // Iterate over all BBs in kernel.
420
- // For each fcall seen, store its opnd0, opnd1, retBlock.
421
- // so that after compiling the copy of function for 1 kernel,
422
- // the IR can be reused for another kernel rather than
423
- // recompiling.
424
- // kernel points to a stackcall function.
425
- std::set<G4_BB*> calledFrets;
426
- for (auto curBB : kernel->fg )
427
- {
428
- if ( curBB->size () > 0 && curBB->isEndWithFCall () )
429
- {
430
- // Save state for this fcall
431
- G4_INST* fcallInst = curBB->back ();
432
-
433
- FCallState currFCallState;
434
-
435
- currFCallState.fcallInst = fcallInst;
436
- currFCallState.opnd0 = fcallInst->getSrc (0 );
437
- currFCallState.opnd1 = fcallInst->getSrc (1 );
438
- currFCallState.retBlock = curBB->Succs .front ();
439
- currFCallState.execSize = fcallInst->getExecSize ();
440
-
441
- savedFCallState.states .push_back ( std::make_pair ( kernel, currFCallState ) );
442
- calledFrets.insert (currFCallState.retBlock );
443
- }
444
- if (curBB->size () > 0 && curBB->isEndWithFRet () && !calledFrets.count (curBB))
445
- {
446
- savedFCallState.retbbs .push_back (curBB);
447
- }
448
- }
449
- }
450
-
451
- void restoreFCallState (G4_Kernel* kernel, SavedFCallStates savedFCallState)
402
+ void restoreFCallState (G4_Kernel* kernel, const std::map<G4_BB*, G4_INST*>& savedFCallState)
452
403
{
453
404
// Iterate over all BBs in kernel and fix all fcalls converted
454
405
// to calls by reconverting them to fcall. This is required
455
406
// because we want to reuse IR of function for next kernel.
456
407
457
- // start, end iterators denote boundaries in vector that correspond
458
- // to current kernel. This assumes that entries for different
459
- // functions are not interspersed.
460
- auto start = savedFCallState.states .begin (), end = savedFCallState.states .end ();
461
-
462
- for ( BB_LIST_ITER bb_it = kernel->fg .begin ();
463
- bb_it != kernel->fg .end ();
464
- bb_it++ )
408
+ for (auto && iter : savedFCallState)
465
409
{
466
- G4_BB* curBB = (*bb_it);
467
-
468
- if ( curBB->size () > 0 &&
469
- curBB->back ()->isCall () )
410
+ auto curBB = iter.first ;
411
+ curBB->pop_back ();
412
+ auto origInst = iter.second ;
413
+ assert (origInst->isFCall () || origInst->isFReturn ());
414
+ curBB->push_back (origInst);
415
+ if (origInst->isFCall () && !origInst->asCFInst ()->isIndirectCall ())
470
416
{
471
- // Check whether this call is a convert from fcall
472
- for ( auto state_it = start;
473
- state_it != end;
474
- state_it++ )
417
+ // curBB must have a physical successor as we don't allow calls that do not return
418
+ G4_BB* retBlock = curBB->getPhysicalSucc ();
419
+ G4_BB* retbbToConvert = retBlock->Preds .back ();
420
+ kernel->fg .removePredSuccEdges (retbbToConvert, retBlock);
421
+ // Remove edge between call and previously joined function
422
+ while (curBB->Succs .size () > 0 )
475
423
{
476
- if ( (*state_it).second .fcallInst == curBB->back () )
477
- {
478
- // Found a call to replace with fcall and ret with fret
479
-
480
- // Restore corresponding ret to fret
481
- G4_BB* retBlock = (*state_it).second .retBlock ;
482
-
483
- G4_BB* retbbToConvert = retBlock->Preds .back ();
484
-
485
- G4_INST* retToReplace = retbbToConvert->back ();
486
-
487
- retToReplace->asCFInst ()->retToFRet ();
488
- retToReplace->setDest (NULL );
489
-
490
- kernel->fg .removePredSuccEdges (retbbToConvert, retBlock);
491
-
492
- // Now restore call operands
493
- G4_INST* instToReplace = curBB->back ();
494
-
495
- auto & state = (*state_it).second ;
496
- instToReplace->setSrc (state.opnd0 , 0 );
497
- instToReplace->setSrc (state.opnd1 , 1 );
498
- instToReplace->setExecSize ((unsigned char )state.execSize );
499
-
500
- // Remove edge between call and previously joined function
501
- while ( curBB->Succs .size () > 0 )
502
- {
503
- kernel->fg .removePredSuccEdges ( curBB, curBB->Succs .front () );
504
- }
505
-
506
- // Restore edge to retBlock
507
- kernel->fg .addPredSuccEdges ( curBB, (*state_it).second .retBlock );
508
-
509
- instToReplace->asCFInst ()->callToFCall ();
510
- }
424
+ kernel->fg .removePredSuccEdges (curBB, curBB->Succs .front ());
511
425
}
512
- }
513
- }
514
-
515
- for (G4_BB* retBB : savedFCallState.retbbs )
516
- {
517
- G4_INST* retToReplace = retBB->back ();
518
-
519
- retToReplace->asCFInst ()->retToFRet ();
520
- retToReplace->setDest (NULL );
521
426
427
+ // Restore edge to retBlock
428
+ kernel->fg .addPredSuccEdges (curBB, retBlock);
429
+ }
522
430
}
523
431
524
432
// Remove all in-edges to stack call function. These may have been added
525
433
// to connect earlier kernels with the function.
526
- while ( kernel->fg .getEntryBB ()->Preds .size () > 0 )
434
+ while ( kernel->fg .getEntryBB ()->Preds .size () > 0 )
527
435
{
528
- kernel->fg .removePredSuccEdges ( kernel->fg .getEntryBB ()->Preds .front (), kernel->fg .getEntryBB () );
436
+ kernel->fg .removePredSuccEdges (kernel->fg .getEntryBB ()->Preds .front (), kernel->fg .getEntryBB ());
529
437
}
530
438
}
531
439
532
440
// Stitch the FG of subFunctions to mainFunc
533
441
// mainFunc could be a kernel or a non-kernel function.
534
442
// It also modifies pseudo_fcall/fret in to call/ret opcodes.
535
443
// ToDo: may consider stitching only functions that may be called by this kernel/function
536
- static void Stitch_Compiled_Units (G4_Kernel* mainFunc, std::map<std::string, G4_Kernel*>& subFuncs)
444
+ static void Stitch_Compiled_Units (G4_Kernel* mainFunc, std::map<std::string, G4_Kernel*>& subFuncs, std::map<G4_BB*, G4_INST*>& FCallRetMap )
537
445
{
538
446
539
447
// Append subFunctions to mainFunc
@@ -549,11 +457,13 @@ static void Stitch_Compiled_Units(G4_Kernel* mainFunc, std::map<std::string, G4_
549
457
}
550
458
551
459
mainFunc->fg .reassignBlockIDs ();
460
+ mainFunc->fg .setPhysicalPredSucc (); // this is to locate the next BB after an fcall
552
461
462
+ auto builder = mainFunc->fg .builder ;
553
463
// Change fcall/fret to call/ret and setup caller/callee edges
554
464
for (G4_BB* cur : mainFunc->fg )
555
465
{
556
- if (cur->size () > 0 && cur-> isEndWithFCall ())
466
+ if (cur->isEndWithFCall ())
557
467
{
558
468
// Setup successor/predecessor
559
469
G4_INST* fcall = cur->back ();
@@ -565,6 +475,7 @@ static void Stitch_Compiled_Units(G4_Kernel* mainFunc, std::map<std::string, G4_
565
475
if (!fcall->asCFInst ()->isIndirectCall ())
566
476
{
567
477
// Setup caller/callee edges for direct call
478
+ // ToDo: remove this once SWSB is moved before stithcing, as we would not need to maintain CFG otherwise
568
479
std::string funcName = fcall->getSrc (0 )->asLabel ()->getLabel ();
569
480
570
481
auto iter = subFuncs.find (funcName);
@@ -585,29 +496,34 @@ static void Stitch_Compiled_Units(G4_Kernel* mainFunc, std::map<std::string, G4_
585
496
G4_INST* calleeLabel = callee->fg .getEntryBB ()->front ();
586
497
ASSERT_USER (calleeLabel->isLabel () == true , " Entry inst is not label" );
587
498
588
- // ret/e-mask
589
- fcall->setSrc (fcall->getSrc (0 ), 1 );
590
-
591
- // dst label
592
- fcall->setSrc (calleeLabel->getSrc (0 ), 0 );
593
- fcall->asCFInst ()->pseudoCallToCall ();
499
+ auto callInst = builder->createInternalInst (fcall->getPredicate (), G4_call, nullptr , false , fcall->getExecSize (), fcall->getDst (),
500
+ calleeLabel->getSrc (0 ), fcall->getSrc (0 ), fcall->getOption ());
501
+ cur->pop_back ();
502
+ cur->push_back (callInst);
594
503
}
595
504
else
596
505
{
597
- fcall->setSrc (fcall->getSrc (0 ), 1 );
598
- fcall->asCFInst ()->pseudoCallToCall ();
506
+ // src0 is dont care for indirect call as long it's not a label
507
+ auto callInst = builder->createInternalInst (fcall->getPredicate (), G4_call, nullptr , false , fcall->getExecSize (), fcall->getDst (),
508
+ fcall->getSrc (0 ), fcall->getSrc (0 ), fcall->getOption ());
509
+ cur->pop_back ();
510
+ cur->push_back (callInst);
599
511
}
512
+ FCallRetMap[cur] = fcall;
600
513
}
601
514
}
602
515
603
516
// Change fret to ret
604
517
for (G4_BB* cur : mainFunc->fg )
605
518
{
606
- if ( cur-> size () > 0 && cur->isEndWithFRet () )
519
+ if ( cur->isEndWithFRet ())
607
520
{
608
521
G4_INST* fret = cur->back ();
609
- fret->asCFInst ()->pseudoRetToRet ();
610
- fret->setDest ( mainFunc->fg .builder ->createNullDst (Type_UD) );
522
+ auto retInst = builder->createInternalInst (fret->getPredicate (), G4_return, nullptr , false , fret->getExecSize (), builder->createNullDst (Type_UD),
523
+ fret->getSrc (0 ), fret->getSrc (1 ), fret->getOption ());
524
+ cur->pop_back ();
525
+ cur->push_back (retInst);
526
+ FCallRetMap[cur] = fret;
611
527
}
612
528
}
613
529
@@ -850,12 +766,6 @@ int CISA_IR_Builder::Compile(const char* nameInput, std::ostream* os, bool emit_
850
766
}
851
767
}
852
768
853
- SavedFCallStates savedFCallState;
854
- for (auto func : m_kernelsAndFunctions)
855
- {
856
- saveFCallState (func->getKernel (), savedFCallState);
857
- }
858
-
859
769
// Preparing for stitching some functions to other functions
860
770
// There are two stiching policies:
861
771
// 1. vISA_noStitchExternFunc == false
@@ -878,7 +788,7 @@ int CISA_IR_Builder::Compile(const char* nameInput, std::ostream* os, bool emit_
878
788
continue ;
879
789
} else {
880
790
if (!m_options.getOption (vISA_noStitchExternFunc)) {
881
- // Policy 1: all fnunctions will stitch to kernels
791
+ // Policy 1: all functions will stitch to kernels
882
792
subFunctions.push_back (func);
883
793
subFunctionsNameMap[std::string (func->getName ())] = func->getKernel ();
884
794
} else {
@@ -911,7 +821,9 @@ int CISA_IR_Builder::Compile(const char* nameInput, std::ostream* os, bool emit_
911
821
{
912
822
unsigned int genxBufferSize = 0 ;
913
823
914
- Stitch_Compiled_Units (func->getKernel (), subFunctionsNameMap);
824
+ // store the BBs with FCall and FRet, which must terminate the BB
825
+ std::map<G4_BB*, G4_INST*> origFCallFRet;
826
+ Stitch_Compiled_Units (func->getKernel (), subFunctionsNameMap, origFCallFRet);
915
827
916
828
void * genxBuffer = func->compilePostOptimize (genxBufferSize);
917
829
func->setGenxBinaryBuffer (genxBuffer, genxBufferSize);
@@ -921,7 +833,7 @@ int CISA_IR_Builder::Compile(const char* nameInput, std::ostream* os, bool emit_
921
833
func->computeAndEmitDebugInfo (subFunctions);
922
834
}
923
835
924
- restoreFCallState (func->getKernel (), savedFCallState );
836
+ restoreFCallState (func->getKernel (), origFCallFRet );
925
837
926
838
927
839
}
0 commit comments