diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 7dd6a2f963..01bc380fcb 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -1,83 +1,83 @@ -name: C/C++ CI - -on: - push: - branches: [ master ] - pull_request: - branches: [ master ] - workflow_dispatch: - -jobs: - debug_builds: - runs-on: ubuntu-latest - strategy: - matrix: - folder: [ epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum ] - fail-fast: false - steps: - - uses: actions/checkout@v2 - - name: make debug - run: make -C ${{ matrix.folder }} debug - CPU: - runs-on: ubuntu-latest - strategy: - matrix: - folder: [ epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum , epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg ] - precision: [ d , f , m ] - backend: [ cppnone, cppauto ] - fail-fast: false - steps: - - uses: actions/checkout@v2 - - name: github PR info - run: date; echo github.event.pull_request.head.sha='${{ github.event.pull_request.head.sha }}' - - name: make info - run: make BACKEND=${{ matrix.backend }} FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk info - - name: make - run: make BACKEND=${{ matrix.backend }} FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} - - name: make test - run: make BACKEND=${{ matrix.backend }} FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk test - CPU_MAC: - runs-on: macos-latest - env: - FC: gfortran-14 # see #971 - strategy: - matrix: - folder: [ epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum, epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg ] - precision: [ d , f , m ] - backend: [ cppnone, cppsse4 ] - fail-fast: false - steps: - - uses: actions/checkout@v2 - - name: github PR info - run: date; echo github.event.pull_request.head.sha='${{ github.event.pull_request.head.sha }}' - - name: make info - run: make BACKEND=${{ matrix.backend }} OMPFLAGS= FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk info - - name: make - run: make BACKEND=${{ matrix.backend }} OMPFLAGS= FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} - - name: make test - run: make BACKEND=${{ matrix.backend }} OMPFLAGS= FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk test - GPU: - runs-on: self-hosted - # runs-on: madgraph5-h100 - # container: registry.cern.ch/ngt/lxplus-like:9 - env: - CUDA_HOME: /usr/local/cuda/ - FC: gfortran - strategy: - matrix: - folder: [ epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum , epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg ] - precision: [ d , f , m ] - backend: [ cppauto, cuda ] - fail-fast: false - steps: - - uses: actions/checkout@v2 - - name: path - run: echo "PATH=$PATH" - - name: github PR info - run: date; echo github.event.pull_request.head.sha='${{ github.event.pull_request.head.sha }}' - - name: make info - run: make BACKEND=${{ matrix.backend }} FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk info - - name: make - run: make BACKEND=${{ matrix.backend }} FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} - - name: make test - run: make BACKEND=${{ matrix.backend }} FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk test +# name: C/C++ CI +# +# on: +# push: +# branches: [ master ] +# pull_request: +# branches: [ master ] +# workflow_dispatch: +# +# jobs: +# debug_builds: +# runs-on: ubuntu-latest +# strategy: +# matrix: +# folder: [ epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum ] +# fail-fast: false +# steps: +# - uses: actions/checkout@v2 +# - name: make debug +# run: make -C ${{ matrix.folder }} debug +# CPU: +# runs-on: ubuntu-latest +# strategy: +# matrix: +# folder: [ epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum , epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg ] +# precision: [ d , f , m ] +# backend: [ cppnone, cppauto ] +# fail-fast: false +# steps: +# - uses: actions/checkout@v2 +# - name: github PR info +# run: date; echo github.event.pull_request.head.sha='${{ github.event.pull_request.head.sha }}' +# - name: make info +# run: make BACKEND=${{ matrix.backend }} FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk info +# - name: make +# run: make BACKEND=${{ matrix.backend }} FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} +# - name: make test +# run: make BACKEND=${{ matrix.backend }} FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk test +# CPU_MAC: +# runs-on: macos-latest +# env: +# FC: gfortran-14 # see #971 +# strategy: +# matrix: +# folder: [ epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum, epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg ] +# precision: [ d , f , m ] +# backend: [ cppnone, cppsse4 ] +# fail-fast: false +# steps: +# - uses: actions/checkout@v2 +# - name: github PR info +# run: date; echo github.event.pull_request.head.sha='${{ github.event.pull_request.head.sha }}' +# - name: make info +# run: make BACKEND=${{ matrix.backend }} OMPFLAGS= FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk info +# - name: make +# run: make BACKEND=${{ matrix.backend }} OMPFLAGS= FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} +# - name: make test +# run: make BACKEND=${{ matrix.backend }} OMPFLAGS= FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk test +# GPU: +# runs-on: self-hosted +# # runs-on: madgraph5-h100 +# # container: registry.cern.ch/ngt/lxplus-like:9 +# env: +# CUDA_HOME: /usr/local/cuda/ +# FC: gfortran +# strategy: +# matrix: +# folder: [ epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum , epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg ] +# precision: [ d , f , m ] +# backend: [ cppauto, cuda ] +# fail-fast: false +# steps: +# - uses: actions/checkout@v2 +# - name: path +# run: echo "PATH=$PATH" +# - name: github PR info +# run: date; echo github.event.pull_request.head.sha='${{ github.event.pull_request.head.sha }}' +# - name: make info +# run: make BACKEND=${{ matrix.backend }} FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk info +# - name: make +# run: make BACKEND=${{ matrix.backend }} FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} +# - name: make test +# run: make BACKEND=${{ matrix.backend }} FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk test diff --git a/.github/workflows/testsuite_oneprocess.sh b/.github/workflows/testsuite_oneprocess.sh index 624f32ba09..69a243d96a 100755 --- a/.github/workflows/testsuite_oneprocess.sh +++ b/.github/workflows/testsuite_oneprocess.sh @@ -165,7 +165,15 @@ function build() { make -f cudacpp.mk gtestlibs fi # NB: 'make bldall' internally checks if 'which nvcc' and 'which hipcc' succeed before attempting to build cuda and hip - make -j bldall + if [ "${proc##*.}" == "sa" ]; then + # for standalone, just use the makefile (symlinked to cudacpp.mk) + rm cudacpp_overlay.mk + ls -l + make -j bldall + else + # makefile overlay after removing patches + make -j -f makefile -f cudacpp_overlay.mk bldall + fi popd >& /dev/null done } @@ -268,7 +276,13 @@ function tput_test() { ECHO echo "DEBUG: execute tests in directory ${bdir}" if [ ! -f ${bdir}/runTest_${suffix}.exe ]; then echo "ERROR! ${bdir}/runTest_${suffix}.exe not found?"; exit 1; fi + # + echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" + ldd ${bdir}/runTest_${suffix}.exe + LD_DEBUG=libs ${bdir}/runTest_${suffix}.exe 2>&1 | head -n 80 + command -v objdump >/dev/null && objdump -d "${bdir}/runTest_${suffix}.exe" | head -n 60 || true runExe ${bdir}/runTest_${suffix}.exe + # if [ ! -f ${bdir}/check_${suffix}.exe ]; then echo "ERROR! ${bdir}/check_${suffix}.exe not found?"; exit 1; fi runExe ${bdir}/check_${suffix}.exe -p 1 32 1 done diff --git a/.github/workflows/testsuite_oneprocess.yml b/.github/workflows/testsuite_oneprocess.yml index a43fe7939a..b1659a1c36 100644 --- a/.github/workflows/testsuite_oneprocess.yml +++ b/.github/workflows/testsuite_oneprocess.yml @@ -236,6 +236,20 @@ jobs: buildcache-${{ runner.os }}-${{ inputs.process }}-${{ matrix.fptype }}-${{ steps.split.outputs.prnum }} buildcache-${{ runner.os }}-${{ inputs.process }}-${{ matrix.fptype }} + - name: Runner CPU info + run: | + uname -a + lscpu || true + cat /proc/cpuinfo | sed -n '1,20p' || true + echo "CC: $CC"; $CC --version || true + echo "CXX: $CXX"; $CXX --version || true + echo "FC: $FC"; $FC --version || true + + - name: Enable core dumps + run: | + ulimit -c unlimited + echo 'core.%e.%p' | sudo tee /proc/sys/kernel/core_pattern + - name: before_build run: .github/workflows/testsuite_oneprocess.sh before_build ${{ inputs.process }} @@ -257,7 +271,39 @@ jobs: key: buildcache-${{ runner.os }}-${{ inputs.process }}-${{ matrix.fptype }}-${{ steps.split.outputs.prnum }}-${{ github.run_id }} - name: tput_test - run: .github/workflows/testsuite_oneprocess.sh tput_test ${{ inputs.process }} + #run: .github/workflows/testsuite_oneprocess.sh tput_test ${{ inputs.process }} + run: | + set -euo pipefail + ulimit -c unlimited + .github/workflows/testsuite_oneprocess.sh tput_test ${{ inputs.process }} + + - name: If crash, print SIGILL site + if: always() + run: | + shopt -s nullglob + sudo apt-get update + sudo apt-get install gdb + for c in $(find . -name 'core.*' -maxdepth 6); do + exe=$(echo "$c" | sed -n 's/.*core\.\(.*\)\.[0-9]\+/\1/p') + echo "---- $c (exe guess: $exe) ----" + for ff in f d m; do + exe_full_path="${c%/*}/build.none_${ff}_inl0_hrd0/$exe" + if [ -f "$exe_full_path" ]; then echo "OK"; else continue; fi + echo "" + echo "===============" + echo "$exe_full_path" + echo "===============" + command -v gdb >/dev/null && gdb -batch -q "$exe_full_path" "$c" -ex 'info reg' -ex 'bt' || true + gdb -batch -q "$exe_full_path" "$c" -ex 'info files' -ex 'bt' -ex 'x/12i $rip' + echo "" + command -v objdump >/dev/null && objdump -d "$exe_full_path" | head -n 60 || true + echo "" + objdump -d "$exe_full_path" | grep -nE '\bzmm|k[0-7]\b|evex' + # or a broader signature: + objdump -d "$exe_full_path" | grep -nE 'v[a-z].*zmm|k[0-7]' + echo "" + done + done - name: tmad_test run: .github/workflows/testsuite_oneprocess.sh tmad_test ${{ inputs.process }} diff --git a/MG5aMC/mg5amcnlo b/MG5aMC/mg5amcnlo index 3c332e08bc..0e967a68c1 160000 --- a/MG5aMC/mg5amcnlo +++ b/MG5aMC/mg5amcnlo @@ -1 +1 @@ -Subproject commit 3c332e08bcaf2b902b67f5c5948601b14891aad2 +Subproject commit 0e967a68c19771919d0131ec0e9120b8541a79ba diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 deleted file mode 100644 index 1c5e505267..0000000000 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +++ /dev/null @@ -1,152 +0,0 @@ -diff --git b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/driver.f a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/driver.f -index af6d02998..ec5722702 100644 ---- b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/driver.f -+++ a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/driver.f -@@ -76,7 +76,15 @@ c common/to_colstats/ncols,ncolflow,ncolalt,ic - - include 'coupl.inc' ! needs VECSIZE_MEMMAX (defined in vector.inc) - INTEGER VECSIZE_USED -- DATA VECSIZE_USED/VECSIZE_MEMMAX/ ! can be changed at runtime -+ -+ character*255 env_name, env_value -+ integer env_length, env_status -+ -+#ifdef MG5AMC_MEEXPORTER_CUDACPP -+ INCLUDE 'fbridge.inc' -+c INCLUDE 'fbridge_common.inc' -+#endif -+ INCLUDE 'fbridge_common.inc' - - C----- - C BEGIN CODE -@@ -84,6 +92,61 @@ C----- - call cpu_time(t_before) - CUMULATED_TIMING = t_before - -+#ifdef _OPENMP -+ CALL OMPNUMTHREADS_NOT_SET_MEANS_ONE_THREAD() -+#endif -+ CALL COUNTERS_INITIALISE() -+ -+#ifdef MG5AMC_MEEXPORTER_CUDACPP -+ fbridge_mode = 1 ! CppOnly=1, default for CUDACPP -+#else -+ fbridge_mode = 0 ! FortranOnly=0, default for FORTRAN -+#endif -+ env_name = 'CUDACPP_RUNTIME_FBRIDGEMODE' -+ call get_environment_variable(env_name, env_value, env_length, env_status) -+ if( env_status.eq.0 ) then -+ write(*,*) 'Found environment variable "', trim(env_name), '" with value "', trim(env_value), '"' -+ read(env_value,'(I255)') FBRIDGE_MODE ! see https://gcc.gnu.org/onlinedocs/gfortran/ICHAR.html -+ write(*,*) 'FBRIDGE_MODE (from env) = ', FBRIDGE_MODE -+ else if( env_status.eq.1 ) then ! 1 = not defined -+ write(*,*) 'FBRIDGE_MODE (default) = ', FBRIDGE_MODE -+ else ! -1 = too long for env_value, 2 = not supported by O/S -+ write(*,*) 'ERROR! get_environment_variable failed for "', trim(env_name), '"' -+ STOP -+ endif -+#ifndef MG5AMC_MEEXPORTER_CUDACPP -+ if( fbridge_mode.ne.0 ) then -+ write(*,*) 'ERROR! Invalid fbridge_mode (in FORTRAN backend mode) = ', fbridge_mode -+ STOP -+ endif -+#endif -+ -+ vecsize_used = vecsize_memmax ! default ! CppOnly=1, default for CUDACPP -+ env_name = 'CUDACPP_RUNTIME_VECSIZEUSED' -+ call get_environment_variable(env_name, env_value, env_length, env_status) -+ if( env_status.eq.0 ) then -+ write(*,*) 'Found environment variable "', trim(env_name), '" with value "', trim(env_value), '"' -+ read(env_value,'(I255)') VECSIZE_USED ! see https://gcc.gnu.org/onlinedocs/gfortran/ICHAR.html -+ write(*,*) 'VECSIZE_USED (from env) = ', VECSIZE_USED -+ else if( env_status.eq.1 ) then ! 1 = not defined -+ write(*,*) 'VECSIZE_USED (default) = ', VECSIZE_USED -+ else ! -1 = too long for env_value, 2 = not supported by O/S -+ write(*,*) 'ERROR! get_environment_variable failed for "', trim(env_name), '"' -+ STOP -+ endif -+ if( VECSIZE_USED.gt.VECSIZE_MEMMAX .or. VECSIZE_USED.le.0 ) then -+ write(*,*) 'ERROR! Invalid VECSIZE_USED = ', VECSIZE_USED -+ STOP -+ endif -+ -+#ifdef MG5AMC_MEEXPORTER_CUDACPP -+ CALL FBRIDGECREATE(FBRIDGE_PBRIDGE, VECSIZE_USED, NEXTERNAL, 4) ! this must be at the beginning as it initialises the CUDA device -+ FBRIDGE_NCBYF1 = 0 -+ FBRIDGE_CBYF1SUM = 0 -+ FBRIDGE_CBYF1SUM2 = 0 -+ FBRIDGE_CBYF1MAX = -1D100 -+ FBRIDGE_CBYF1MIN = 1D100 -+#endif - c - c Read process number - c -@@ -217,9 +280,33 @@ c call sample_result(xsec,xerr) - c write(*,*) 'Final xsec: ',xsec - - rewind(lun) -- - close(lun) - -+#ifdef MG5AMC_MEEXPORTER_CUDACPP -+ CALL FBRIDGEDELETE(FBRIDGE_PBRIDGE) ! this must be at the end as it shuts down the CUDA device -+ IF( FBRIDGE_MODE .LE. -1 ) THEN ! (BothQuiet=-1 or BothDebug=-2) -+ WRITE(*,'(a,f10.8,a,e8.2)') -+ & ' [MERATIOS] ME ratio CudaCpp/Fortran: MIN = ', -+ & FBRIDGE_CBYF1MIN + 1, ' = 1 - ', -FBRIDGE_CBYF1MIN -+ WRITE(*,'(a,f10.8,a,e8.2)') -+ & ' [MERATIOS] ME ratio CudaCpp/Fortran: MAX = ', -+ & FBRIDGE_CBYF1MAX + 1, ' = 1 + ', FBRIDGE_CBYF1MAX -+ WRITE(*,'(a,i6)') -+ & ' [MERATIOS] ME ratio CudaCpp/Fortran: NENTRIES = ', -+ & FBRIDGE_NCBYF1 -+c WRITE(*,'(a,e8.2)') -+c & ' [MERATIOS] ME ratio CudaCpp/Fortran - 1: AVG = ', -+c & FBRIDGE_CBYF1SUM / FBRIDGE_NCBYF1 -+c WRITE(*,'(a,e8.2)') -+c & ' [MERATIOS] ME ratio CudaCpp/Fortran - 1: STD = ', -+c & SQRT( FBRIDGE_CBYF1SUM2 / FBRIDGE_NCBYF1 ) ! ~standard deviation -+ WRITE(*,'(a,e8.2,a,e8.2)') -+ & ' [MERATIOS] ME ratio CudaCpp/Fortran - 1: AVG = ', -+ & FBRIDGE_CBYF1SUM / FBRIDGE_NCBYF1, ' +- ', -+ & SQRT( FBRIDGE_CBYF1SUM2 ) / FBRIDGE_NCBYF1 ! ~standard error -+ ENDIF -+#endif -+ CALL COUNTERS_FINALISE() - end - - c $B$ get_user_params $B$ ! tag for MadWeight -diff --git b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f -index bf488e4b0..707ea4032 100644 ---- b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f -+++ a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f -@@ -71,7 +71,10 @@ C - DATA NB_FAIL /0/ - DOUBLE PRECISION GET_CHANNEL_CUT - EXTERNAL GET_CHANNEL_CUT -- -+C -+ INTEGER NGOODHEL ! -1 if not yet retrieved and printed -+ SAVE NGOODHEL -+ DATA NGOODHEL/-1/ - C - C This is just to temporarily store the reference grid for - C helicity of the DiscreteSampler so as to obtain its number of -@@ -224,6 +227,17 @@ C update. - ENDIF - IF(NTRY(1).EQ.MAXTRIES)THEN - ISHEL=MIN(ISUM_HEL,NGOOD) -+C Print the number of good helicities -+ IF (NGOODHEL.EQ.-1) THEN -+ NGOODHEL=0 -+ DO I=1,NCOMB -+ IF (GOODHEL(I,1)) THEN -+ NGOODHEL=NGOODHEL+1 -+ ENDIF -+ END DO -+ WRITE (6,*) 'NGOODHEL =', NGOODHEL -+ WRITE (6,*) 'NCOMB =', NCOMB -+ ENDIF - ENDIF - ENDIF - ELSE IF (.NOT.INIT_MODE) THEN ! random helicity diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common deleted file mode 100644 index ce1c49dc2a..0000000000 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common +++ /dev/null @@ -1,312 +0,0 @@ -diff --git b/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile a/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile -index 348c283be..49e6800ff 100644 ---- b/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile -+++ a/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile -@@ -1,6 +1,37 @@ -+SHELL := /bin/bash -+ - include ../../Source/make_opts -+ -+# Determine CUDACPP_BUILDDIR based on the user-defined choices of BACKEND, FPTYPE, HELINL, HRDCOD and USEBUILDDIR (#829) -+# Stop with an error if BACKEND=cuda and nvcc is missing or if BACKEND=hip and hipcc is missing -+include ../../src/cudacpp_config.mk -+ifeq ($(CUDACPP_BUILDDIR),) -+$(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!) -+endif -+ -+# Disable all Fortran warnings? - FFLAGS+= -w - -+# Enable the C preprocessor https://gcc.gnu.org/onlinedocs/gfortran/Preprocessing-Options.html -+FFLAGS+= -cpp -+ -+# Compile counters with -O3 as in the cudacpp makefile (avoid being "unfair" to Fortran #740) -+CXXFLAGS = -O3 -Wall -Wshadow -Wextra -+ -+# Add -std=c++17 explicitly to avoid build errors on macOS -+# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" -+ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -+CXXFLAGS += -std=c++17 -mmacosx-version-min=11.3 -+endif -+ -+# Enable ccache for C++ if USECCACHE=1 (do not enable it for Fortran since it is not supported for Fortran) -+ifeq ($(USECCACHE)$(shell echo $(CXX) | grep ccache),1) -+ override CXX:=ccache $(CXX) -+endif -+###ifeq ($(USECCACHE)$(shell echo $(FC) | grep ccache),1) -+### override FC:=ccache $(FC) -+###endif -+ - # Load additional dependencies of the bias module, if present - ifeq (,$(wildcard ../bias_dependencies)) - BIASDEPENDENCIES = -@@ -24,7 +55,20 @@ else - MADLOOP_LIB = - endif - --LINKLIBS = $(LINK_MADLOOP_LIB) $(LINK_LOOP_LIBS) -L../../lib/ -ldhelas -ldsample -lmodel -lgeneric -lpdf -lgammaUPC -lcernlib $(llhapdf) -lbias -+LINKLIBS = $(LINK_MADLOOP_LIB) $(LINK_LOOP_LIBS) -L$(LIBDIR) -ldhelas -ldsample -lmodel -lgeneric -lpdf -lcernlib $(llhapdf) -lbias -+ -+CUDACPP_MAKEFILE=cudacpp.mk -+processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') -+ifeq ($(BACKEND),cuda) -+CUDACPP_COMMONLIB=mg5amc_common_cuda -+CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_cuda -+else ifeq ($(BACKEND),hip) -+CUDACPP_COMMONLIB=mg5amc_common_hip -+CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_hip -+else -+CUDACPP_COMMONLIB=mg5amc_common_cpp -+CUDACPP_BACKENDLIB=mg5amc_$(processid_short)_cpp -+endif - - LIBS = $(LIBDIR)libbias.$(libext) $(LIBDIR)libdhelas.$(libext) $(LIBDIR)libdsample.$(libext) $(LIBDIR)libgeneric.$(libext) $(LIBDIR)libpdf.$(libext) $(LIBDIR)libgammaUPC.$(libext) $(LIBDIR)libmodel.$(libext) $(LIBDIR)libcernlib.$(libext) $(MADLOOP_LIB) $(LOOP_LIBS) - -@@ -43,41 +87,148 @@ ifeq ($(strip $(MATRIX_HEL)),) - endif - - --PROCESS= driver.o myamp.o genps.o unwgt.o setcuts.o get_color.o \ -+PROCESS= myamp.o genps.o unwgt.o setcuts.o get_color.o \ - cuts.o cluster.o reweight.o initcluster.o addmothers.o setscales.o \ -- idenparts.o dummy_fct.o \ -- $(patsubst %.f,%.o,$(wildcard auto_dsig*.f)) \ -+ idenparts.o dummy_fct.o -+ -+DSIG=driver.o $(patsubst %.f, %.o, $(filter-out auto_dsig.f, $(wildcard auto_dsig*.f))) -+DSIG_cudacpp=driver_cudacpp.o $(patsubst %.f, %_cudacpp.o, $(filter-out auto_dsig.f, $(wildcard auto_dsig*.f))) - - SYMMETRY = symmetry.o idenparts.o - - # Binaries - --$(PROG): $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX) -- $(FC) -o $(PROG) $(PROCESS) $(MATRIX) $(LINKLIBS) $(LDFLAGS) $(BIASDEPENDENCIES) -fopenmp -+ifeq ($(UNAME),Darwin) -+LDFLAGS += -lc++ # avoid 'Undefined symbols' for chrono::steady_clock on macOS (checked with otool -L libmg5amc_gg_ttx_cpp.so) -+LDFLAGS += -mmacosx-version-min=11.3 # avoid "ld: warning: object file was built for newer macOS version than being linked" -+else -+LDFLAGS += -Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (not supported on macOS) -+endif - --$(PROG)_forhel: $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX_HEL) -- $(FC) -o $(PROG)_forhel $(PROCESS) $(MATRIX_HEL) $(LINKLIBS) $(LDFLAGS) $(BIASDEPENDENCIES) -fopenmp -+# Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) -+.DEFAULT_GOAL := all - --gensym: $(SYMMETRY) configs.inc $(LIBS) -- $(FC) -o gensym $(SYMMETRY) -L../../lib/ $(LINKLIBS) $(LDFLAGS) -+ifeq ($(BACKEND),cuda) -+all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cuda -+else ifeq ($(BACKEND),hip) -+all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_hip -+else -+all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp -+endif - --$(LIBDIR)libmodel.$(libext): ../../Cards/param_card.dat -- cd ../../Source/MODEL; make -+# Disable OpenMP by default: enable OpenMP only if USEOPENMP=1 (#758) -+ifeq ($(USEOPENMP),1) -+ifneq ($(shell $(CXX) --version | egrep '^Intel'),) -+override OMPFLAGS = -fopenmp -+LINKLIBS += -liomp5 # see #578 -+LINKLIBS += -lintlc # undefined reference to `_intel_fast_memcpy' -+else ifneq ($(shell $(CXX) --version | egrep '^clang'),) -+override OMPFLAGS = -fopenmp -+$(CUDACPP_BUILDDIR)/$(PROG)_cpp: LINKLIBS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 -+else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -+override OMPFLAGS = # OMP is not supported yet by cudacpp for Apple clang -+else -+override OMPFLAGS = -fopenmp -+endif -+endif -+ -+$(PROG)_fortran: $(PROCESS) $(DSIG) auto_dsig.o $(LIBS) $(MATRIX) counters.o ompnumthreads.o -+ $(FC) -o $(PROG)_fortran $(PROCESS) $(DSIG) auto_dsig.o $(MATRIX) $(LINKLIBS) $(BIASDEPENDENCIES) $(OMPFLAGS) counters.o ompnumthreads.o $(LDFLAGS) - --$(LIBDIR)libgeneric.$(libext): ../../Cards/run_card.dat -+$(LIBS): .libs -+ -+.libs: ../../Cards/param_card.dat ../../Cards/run_card.dat - cd ../../Source; make -+ touch $@ -+ -+$(CUDACPP_BUILDDIR)/.cudacpplibs: -+ $(MAKE) -f $(CUDACPP_MAKEFILE) -+ touch $@ -+ -+# On Linux, set rpath to LIBDIR to make it unnecessary to use LD_LIBRARY_PATH -+# Use relative paths with respect to the executables ($ORIGIN on Linux) -+# On Darwin, building libraries with absolute paths in LIBDIR makes this unnecessary -+ifeq ($(UNAME_S),Darwin) -+ override LIBFLAGSRPATH = -+else ifeq ($(USEBUILDDIR),1) -+ override LIBFLAGSRPATH = -Wl,-rpath,'$$ORIGIN/../$(LIBDIR)/$(CUDACPP_BUILDDIR)' -+else -+ override LIBFLAGSRPATH = -Wl,-rpath,'$$ORIGIN/$(LIBDIR)' -+endif -+ -+.PHONY: madevent_fortran_link madevent_cuda_link madevent_hip_link madevent_cpp_link -+ -+madevent_fortran_link: $(PROG)_fortran -+ rm -f $(PROG) -+ ln -s $(PROG)_fortran $(PROG) -+ -+madevent_cuda_link: -+ $(MAKE) USEGTEST=0 BACKEND=cuda $(CUDACPP_BUILDDIR)/$(PROG)_cuda -+ rm -f $(PROG) -+ ln -s $(CUDACPP_BUILDDIR)/$(PROG)_cuda $(PROG) -+ -+madevent_hip_link: -+ $(MAKE) USEGTEST=0 BACKEND=hip $(CUDACPP_BUILDDIR)/$(PROG)_hip -+ rm -f $(PROG) -+ ln -s $(CUDACPP_BUILDDIR)/$(PROG)_hip $(PROG) -+ -+madevent_cpp_link: -+ $(MAKE) USEGTEST=0 BACKEND=cppauto $(CUDACPP_BUILDDIR)/$(PROG)_cpp -+ rm -f $(PROG) -+ ln -s $(CUDACPP_BUILDDIR)/$(PROG)_cpp $(PROG) - --$(LIBDIR)libpdf.$(libext): -- cd ../../Source/PDF; make -+override SUPPORTED_AVXS = cppnone cppsse4 cppavx2 cpp512y cpp512z cppauto -+madevent_%_link: -+ @if [ '$(words $(filter $*, $(SUPPORTED_AVXS)))' != '1' ]; then echo "ERROR! Invalid target '$@' (supported madevent_cpp*_link targets are: $(foreach avx,$(SUPPORTED_AVXS),'madevent_cpp$(avx)_link'))"; exit 1; fi -+ $(MAKE) USEGTEST=0 BACKEND=$* $(CUDACPP_BUILDDIR)/$(PROG)_cpp -+ rm -f $(PROG) -+ ln -s $(CUDACPP_BUILDDIR)/$(PROG)_cpp $(PROG) - --$(LIBDIR)libgammaUPC.$(libext): -- cd ../../Source/PDF/gammaUPC; make -+# Building $(PROG)_cpp no longer builds $(PROG)_cuda if CUDACPP_BACKENDLIB for cuda exists (this was the case in the past to allow cpp-only builds #503) -+$(CUDACPP_BUILDDIR)/$(PROG)_cpp: $(PROCESS) $(DSIG_cudacpp) auto_dsig.o $(LIBS) $(MATRIX) counters.o ompnumthreads.o $(CUDACPP_BUILDDIR)/.cudacpplibs -+ $(FC) -o $@ $(PROCESS) $(DSIG_cudacpp) auto_dsig.o $(MATRIX) $(LINKLIBS) $(BIASDEPENDENCIES) $(OMPFLAGS) counters.o ompnumthreads.o -L$(LIBDIR)/$(CUDACPP_BUILDDIR) -l$(CUDACPP_COMMONLIB) -l$(CUDACPP_BACKENDLIB) $(LIBFLAGSRPATH) $(LDFLAGS) -+ -+# Building $(PROG)_cuda now uses its own rule -+$(CUDACPP_BUILDDIR)/$(PROG)_cuda: $(PROCESS) $(DSIG_cudacpp) auto_dsig.o $(LIBS) $(MATRIX) counters.o ompnumthreads.o $(CUDACPP_BUILDDIR)/.cudacpplibs -+ $(FC) -o $@ $(PROCESS) $(DSIG_cudacpp) auto_dsig.o $(MATRIX) $(LINKLIBS) $(BIASDEPENDENCIES) $(OMPFLAGS) counters.o ompnumthreads.o -L$(LIBDIR)/$(CUDACPP_BUILDDIR) -l$(CUDACPP_COMMONLIB) -l$(CUDACPP_BACKENDLIB) $(LIBFLAGSRPATH) $(LDFLAGS) -+ -+# Building $(PROG)_hip also uses its own rule -+$(CUDACPP_BUILDDIR)/$(PROG)_hip: $(PROCESS) $(DSIG_cudacpp) auto_dsig.o $(LIBS) $(MATRIX) counters.o ompnumthreads.o $(CUDACPP_BUILDDIR)/.cudacpplibs -+ $(FC) -o $@ $(PROCESS) $(DSIG_cudacpp) auto_dsig.o $(MATRIX) $(LINKLIBS) $(BIASDEPENDENCIES) $(OMPFLAGS) counters.o ompnumthreads.o -L$(LIBDIR)/$(CUDACPP_BUILDDIR) -l$(CUDACPP_COMMONLIB) -l$(CUDACPP_BACKENDLIB) $(LIBFLAGSRPATH) $(LDFLAGS) -+ -+counters.o: counters.cc timer.h -+ $(CXX) $(CXXFLAGS) -c $< -o $@ -+ -+ompnumthreads.o: ompnumthreads.cc ompnumthreads.h -+ $(CXX) -I. $(CXXFLAGS) $(OMPFLAGS) -c $< -o $@ -+ -+$(PROG)_forhel: $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX_HEL) -+ $(FC) -o $(PROG)_forhel $(PROCESS) $(MATRIX_HEL) $(LINKLIBS) $(LDFLAGS) $(BIASDEPENDENCIES) $(OMPFLAGS) -+ -+gensym: $(SYMMETRY) configs.inc $(LIBS) -+ $(FC) -o gensym $(SYMMETRY) -L$(LIBDIR) $(LINKLIBS) $(LDFLAGS) -+ -+###ifeq (,$(wildcard fbridge.inc)) # Pointless: fbridge.inc always exists as this is the cudacpp-modified makefile! -+###$(LIBDIR)libmodel.$(libext): ../../Cards/param_card.dat -+### cd ../../Source/MODEL; make -+### -+###$(LIBDIR)libgeneric.$(libext): ../../Cards/run_card.dat -+### cd ../../Source; make -+### -+###$(LIBDIR)libpdf.$(libext): -+### cd ../../Source/PDF; make -+### -+###$(LIBDIR)libgammaUPC.$(libext): -+### cd ../../Source/PDF/gammaUPC; make -+###endif - - # Add source so that the compiler finds the DiscreteSampler module. - $(MATRIX): %.o: %.f - $(FC) $(FFLAGS) $(MATRIX_FLAG) -c $< -I../../Source/ -I../../Source/PDF/gammaUPC - %.o: %.f - $(FC) $(FFLAGS) -c $< -I../../Source/ -I../../Source/PDF/gammaUPC -+%_cudacpp.o: %.f -+ $(FC) $(FFLAGS) -c -DMG5AMC_MEEXPORTER_CUDACPP $< -I../../Source/ $(OMPFLAGS) -o $@ - - # Dependencies - -@@ -97,5 +248,80 @@ unwgt.o: genps.inc nexternal.inc symswap.inc cluster.inc run.inc message.inc \ - run_config.inc - initcluster.o: message.inc - --clean: -- $(RM) *.o gensym madevent madevent_forhel -+# Extra dependencies on discretesampler.mod -+ -+auto_dsig.o: .libs -+driver.o: .libs -+driver_cudacpp.o: .libs -+$(MATRIX): .libs -+genps.o: .libs -+ -+# Cudacpp bldall targets -+ -+ifeq ($(UNAME_P),ppc64le) -+bldavxs: bldnone bldsse4 -+else ifeq ($(UNAME_P),arm) -+bldavxs: bldnone bldsse4 -+else -+bldavxs: bldnone bldsse4 bldavx2 bld512y bld512z -+endif -+ -+ifneq ($(shell which hipcc 2>/dev/null),) -+ifneq ($(shell which nvcc 2>/dev/null),) -+bldall: bldhip bldcuda bldavxs -+else -+bldall: bldhip bldavxs -+endif -+else -+ifneq ($(shell which nvcc 2>/dev/null),) -+bldall: bldcuda bldavxs -+else -+bldall: bldavxs -+endif -+endif -+ -+bldcuda: $(PROG)_fortran $(DSIG_cudacpp) -+ @echo -+ $(MAKE) USEBUILDDIR=1 BACKEND=cuda -+ -+bldhip: $(PROG)_fortran $(DSIG_cudacpp) -+ @echo -+ $(MAKE) USEBUILDDIR=1 BACKEND=hip -+ -+bldnone: $(PROG)_fortran $(DSIG_cudacpp) -+ @echo -+ $(MAKE) USEBUILDDIR=1 BACKEND=cppnone -+ -+bldsse4: $(PROG)_fortran $(DSIG_cudacpp) -+ @echo -+ $(MAKE) USEBUILDDIR=1 BACKEND=cppsse4 -+ -+bldavx2: $(PROG)_fortran $(DSIG_cudacpp) -+ @echo -+ $(MAKE) USEBUILDDIR=1 BACKEND=cppavx2 -+ -+bld512y: $(PROG)_fortran $(DSIG_cudacpp) -+ @echo -+ $(MAKE) USEBUILDDIR=1 BACKEND=cpp512y -+ -+bld512z: $(PROG)_fortran $(DSIG_cudacpp) -+ @echo -+ $(MAKE) USEBUILDDIR=1 BACKEND=cpp512z -+ -+# Clean (NB: 'make clean' in Source calls 'make clean' in all P*) -+ -+clean: # Clean builds: fortran in this Pn; cudacpp executables for one AVX in this Pn -+ $(RM) *.o gensym $(PROG) $(PROG)_fortran $(PROG)_forhel $(CUDACPP_BUILDDIR)/$(PROG)_cpp $(CUDACPP_BUILDDIR)/$(PROG)_cuda $(CUDACPP_BUILDDIR)/$(PROG)_hip -+ -+cleanavxs: clean # Clean builds: fortran in this Pn; cudacpp for all AVX in this Pn and in src -+ $(MAKE) -f $(CUDACPP_MAKEFILE) cleanall -+ rm -f $(CUDACPP_BUILDDIR)/.cudacpplibs -+ rm -f .libs -+ -+cleanall: # Clean builds: fortran in all P* and in Source; cudacpp for all AVX in all P* and in src -+ make -C ../../Source cleanall -+ rm -rf $(LIBDIR)libbias.$(libext) -+ rm -f ../../Source/*.mod ../../Source/*/*.mod -+ -+distclean: cleanall # Clean all fortran and cudacpp builds as well as the googletest installation -+ $(MAKE) -f $(CUDACPP_MAKEFILE) distclean diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py index b1739da73d..421fb0f97e 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py @@ -40,7 +40,9 @@ def compile(self, *args, **opts): cudacpp_backend = self.run_card['cudacpp_backend'].lower() # the default value is defined in launch_plugin.py logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) if cudacpp_backend in cudacpp_supported_backends : - args[0][0] = 'madevent_' + cudacpp_backend + '_link' + new_args = list(args) + new_args[0] = ['-f', 'makefile', '-f', 'cudacpp_overlay.mk', 'madevent_' + cudacpp_backend + '_link'] + new_args[0][1:] + args = tuple(new_args) else: raise Exception( "Invalid cudacpp_backend='%s': supported backends are %s"%supported_backends ) return misc.compile(nb_core=self.options['nb_core'], *args, **opts) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_overlay.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_overlay.mk new file mode 100644 index 0000000000..0af10482d7 --- /dev/null +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_overlay.mk @@ -0,0 +1,297 @@ +# cudacpp_overlay.mk +# To be used after the project makefile +# Usage: make -f makefile -f cudacpp_overlay.mk ... +SHELL := /bin/bash + +# Recursive-make helper +PRIMARY_MK ?= makefile +OVERLAY_MK ?= cudacpp_overlay.mk +SELF_MF := -f $(PRIMARY_MK) -f $(OVERLAY_MK) + +# Determine CUDACPP_BUILDDIR based on the user-defined choices of BACKEND, FPTYPE, HELINL, HRDCOD and USEBUILDDIR (#829) +# Stop with an error if BACKEND=cuda and nvcc is missing or if BACKEND=hip and hipcc is missing +include ../../src/cudacpp_config.mk +ifeq ($(CUDACPP_BUILDDIR),) + $(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!) +endif + +# Basic uname helpers (if not already set) +UNAME_S ?= $(shell uname -s) +UNAME_P ?= $(shell uname -p) + +# Enable the C preprocessor https://gcc.gnu.org/onlinedocs/gfortran/Preprocessing-Options.html +FFLAGS+= -cpp + +# Compile counters with -O3 as in the cudacpp makefile (avoid being "unfair" to Fortran #740) +CXXFLAGS = -O3 -Wall -Wshadow -Wextra + +# Add -std=c++17 explicitly to avoid build errors on macOS +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) + CXXFLAGS += -std=c++17 -mmacosx-version-min=11.3 +endif + +# Enable ccache for C++ if USECCACHE=1 (do not enable it for Fortran since it is not supported for Fortran) +ifeq ($(USECCACHE)$(shell echo $(CXX) | grep ccache),1) + override CXX := ccache $(CXX) +endif + +# ---------------------------------------------------------------------- +# Backend library names and process id +# ---------------------------------------------------------------------- +CUDACPP_MAKEFILE := cudacpp.mk +processid_short := $(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') + +ifeq ($(BACKEND),cuda) + CUDACPP_COMMONLIB := mg5amc_common_cuda + CUDACPP_BACKENDLIB := mg5amc_$(processid_short)_cuda +else ifeq ($(BACKEND),hip) + CUDACPP_COMMONLIB := mg5amc_common_hip + CUDACPP_BACKENDLIB := mg5amc_$(processid_short)_hip +else + CUDACPP_COMMONLIB := mg5amc_common_cpp + CUDACPP_BACKENDLIB := mg5amc_$(processid_short)_cpp +endif + +# ---------------------------------------------------------------------- +# Libraries and link line adjustments +# ---------------------------------------------------------------------- +# Prefer LIBDIR everywhere; base makefile already defines LIBDIR. +LINKLIBS := $(LINK_MADLOOP_LIB) $(LINK_LOOP_LIBS) -L$(LIBDIR) \ + -ldhelas -ldsample -lmodel -lgeneric -lpdf -lcernlib $(llhapdf) -lbias + +# OpenMP: enable only if requested, USEOPENMP=1 (#758) +ifeq ($(USEOPENMP),1) + ifneq ($(shell $(CXX) --version | egrep '^Intel'),) + override OMPFLAGS = -fopenmp + LINKLIBS += -liomp5 # see #578 + LIBKLIBS += -lintlc # undefined reference to '_intel_fast_memcpy' + else ifneq ($(shell $(CXX) --version | egrep '^clang'),) + override OMPFLAGS = -fopenmp + # For the *cpp* binary with clang, ensure libomp is found + $(CUDACPP_BUILDDIR)/$(PROG)_cpp: LINKLIBS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 + else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) + override OMPFLAGS = # OMP is not supported yet by cudacpp for Apple clang + else + override OMPFLAGS = -fopenmp + endif +endif + +# ---------------------------------------------------------------------- +# Objects & targets +# ---------------------------------------------------------------------- +# Keep driver* separate from PROCESS; we form DSIG groups below. +PROCESS := myamp.o genps.o unwgt.o setcuts.o get_color.o \ + cuts.o cluster.o reweight.o initcluster.o addmothers.o setscales.o \ + idenparts.o dummy_fct.o + +DSIG := driver.o $(patsubst %.f, %.o, $(filter-out auto_dsig.f, $(wildcard auto_dsig*.f))) +DSIG_cudacpp := driver_cudacpp.o $(patsubst %.f, %_cudacpp.o, $(filter-out auto_dsig.f, $(wildcard auto_dsig*.f))) + +SYMMETRY := symmetry.o idenparts.o + +# Binaries + +ifeq ($(UNAME),Darwin) + LDFLAGS += -lc++ -mmacosx-version-min=11.3 +else + LDFLAGS += -Wl,--no-relax +endif + +# Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) +.DEFAULT_GOAL := all +ifeq ($(BACKEND),cuda) + all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cuda +else ifeq ($(BACKEND),hip) + all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_hip +else + all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp +endif + +# Library build stamps +$(LIBS): .libs + +.libs: ../../Cards/param_card.dat ../../Cards/run_card.dat + $(MAKE) -C ../../Source + touch $@ + +$(CUDACPP_BUILDDIR)/.cudacpplibs: + $(MAKE) -f $(CUDACPP_MAKEFILE) + touch $@ + +# Remove per-library recipes from makefile to avoid duplicate sub-makes +# under ../../Source running in parallel otherwise we can have race condition +# Build the libs only via the single .libs stamp. + +# Ensure these targets are satisfied by building Source once +$(LIBDIR)libmodel.$(libext) : | .libs +$(LIBDIR)libgeneric.$(libext) : | .libs +$(LIBDIR)libpdf.$(libext) : | .libs +$(LIBDIR)libgammaUPC.$(libext) : | .libs + +# Override the recipes from makefile_orig with empty recipes +# (GNU Make will use the last recipe it reads.) +$(LIBDIR)libmodel.$(libext) : ; @: +$(LIBDIR)libgeneric.$(libext) : ; @: +$(LIBDIR)libpdf.$(libext) : ; @: +$(LIBDIR)libgammaUPC.$(libext) : ; @: + +# On Linux, set rpath to LIBDIR to make it unnecessary to use LD_LIBRARY_PATH +# Use relative paths with respect to the executables ($ORIGIN on Linux) +# On Darwin, building libraries with absolute paths in LIBDIR makes this unnecessary +ifeq ($(UNAME_S),Darwin) + override LIBFLAGSRPATH := +else ifeq ($(USEBUILDDIR),1) + override LIBFLAGSRPATH := -Wl,-rpath,'$$ORIGIN/../$(LIBDIR)/$(CUDACPP_BUILDDIR)' +else + override LIBFLAGSRPATH := -Wl,-rpath,'$$ORIGIN/$(LIBDIR)' +endif + +# Final link steps +$(PROG)_fortran: $(PROCESS) $(DSIG) auto_dsig.o $(LIBS) $(MATRIX) counters.o ompnumthreads.o + $(FC) -o $@ $(PROCESS) $(DSIG) auto_dsig.o $(MATRIX) $(LINKLIBS) $(BIASDEPENDENCIES) $(OMPFLAGS) counters.o ompnumthreads.o $(LDFLAGS) + +# Building $(PROG)_cpp no longer builds $(PROG)_cuda if CUDACPP_BACKENDLIB for cuda exists (this was the case in the past to allow cpp-only builds #503) +$(CUDACPP_BUILDDIR)/$(PROG)_cpp: $(PROCESS) $(DSIG_cudacpp) auto_dsig.o $(LIBS) $(MATRIX) counters.o ompnumthreads.o $(CUDACPP_BUILDDIR)/.cudacpplibs + $(FC) -o $@ $(PROCESS) $(DSIG_cudacpp) auto_dsig.o $(MATRIX) $(LINKLIBS) $(BIASDEPENDENCIES) $(OMPFLAGS) counters.o ompnumthreads.o -L$(LIBDIR)/$(CUDACPP_BUILDDIR) -l$(CUDACPP_COMMONLIB) -l$(CUDACPP_BACKENDLIB) $(LIBFLAGSRPATH) $(LDFLAGS) + +# Building $(PROG)_cuda now uses its own rule +$(CUDACPP_BUILDDIR)/$(PROG)_cuda: $(PROCESS) $(DSIG_cudacpp) auto_dsig.o $(LIBS) $(MATRIX) counters.o ompnumthreads.o $(CUDACPP_BUILDDIR)/.cudacpplibs + $(FC) -o $@ $(PROCESS) $(DSIG_cudacpp) auto_dsig.o $(MATRIX) $(LINKLIBS) $(BIASDEPENDENCIES) $(OMPFLAGS) counters.o ompnumthreads.o -L$(LIBDIR)/$(CUDACPP_BUILDDIR) -l$(CUDACPP_COMMONLIB) -l$(CUDACPP_BACKENDLIB) $(LIBFLAGSRPATH) $(LDFLAGS) + +# Building $(PROG)_hip also uses its own rule +$(CUDACPP_BUILDDIR)/$(PROG)_hip: $(PROCESS) $(DSIG_cudacpp) auto_dsig.o $(LIBS) $(MATRIX) counters.o ompnumthreads.o $(CUDACPP_BUILDDIR)/.cudacpplibs + $(FC) -o $@ $(PROCESS) $(DSIG_cudacpp) auto_dsig.o $(MATRIX) $(LINKLIBS) $(BIASDEPENDENCIES) $(OMPFLAGS) counters.o ompnumthreads.o -L$(LIBDIR)/$(CUDACPP_BUILDDIR) -l$(CUDACPP_COMMONLIB) -l$(CUDACPP_BACKENDLIB) $(LIBFLAGSRPATH) $(LDFLAGS) + +# Helpers compiled with C++ +counters.o: counters.cc timer.h + $(CXX) $(CXXFLAGS) -c $< -o $@ + +ompnumthreads.o: ompnumthreads.cc ompnumthreads.h + $(CXX) -I. $(CXXFLAGS) $(OMPFLAGS) -c $< -o $@ + +# Alternate binaries (kept for parity) +$(PROG)_forhel: $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX_HEL) + $(FC) -o $@ $(PROCESS) $(MATRIX_HEL) $(LINKLIBS) $(LDFLAGS) $(BIASDEPENDENCIES) $(OMPFLAGS) + +gensym: $(SYMMETRY) configs.inc $(LIBS) + $(FC) -o $@ $(SYMMETRY) -L$(LIBDIR) $(LINKLIBS) $(LDFLAGS) + +# Compile rules (override base ones) +$(MATRIX): %.o: %.f + $(FC) $(FFLAGS) $(MATRIX_FLAG) -c $< -I../../Source/ -I../../Source/PDF/gammaUPC + +%.o: %.f + $(FC) $(FFLAGS) -c $< -I../../Source/ -I../../Source/PDF/gammaUPC + +%_cudacpp.o: %.f + $(FC) $(FFLAGS) -c -DMG5AMC_MEEXPORTER_CUDACPP $< -I../../Source/ $(OMPFLAGS) -o $@ + +# Extra dependencies on discretesampler.mod +auto_dsig.o: .libs +driver.o: .libs +driver_cudacpp.o: .libs +$(MATRIX): .libs +genps.o: .libs + +# Convenience link targets to switch $(PROG) symlink +.PHONY: madevent_fortran_link madevent_cuda_link madevent_hip_link madevent_cpp_link +madevent_fortran_link: $(PROG)_fortran + rm -f $(PROG) + ln -s $(PROG)_fortran $(PROG) + +madevent_cuda_link: + $(MAKE) $(SELF_MF) USEGTEST=0 BACKEND=cuda $(CUDACPP_BUILDDIR)/$(PROG)_cuda + rm -f $(PROG) + ln -s $(CUDACPP_BUILDDIR)/$(PROG)_cuda $(PROG) + +madevent_hip_link: + $(MAKE) $(SELF_MF) USEGTEST=0 BACKEND=hip $(CUDACPP_BUILDDIR)/$(PROG)_hip + rm -f $(PROG) + ln -s $(CUDACPP_BUILDDIR)/$(PROG)_hip $(PROG) + +madevent_cpp_link: + $(MAKE) $(SELF_MF) USEGTEST=0 BACKEND=cppauto $(CUDACPP_BUILDDIR)/$(PROG)_cpp + rm -f $(PROG) + ln -s $(CUDACPP_BUILDDIR)/$(PROG)_cpp $(PROG) + +# Variant AVX builds for cpp backend +override SUPPORTED_AVXS := cppnone cppsse4 cppavx2 cpp512y cpp512z cppauto +madevent_%_link: + @if [ '$(words $(filter $*, $(SUPPORTED_AVXS)))' != '1' ]; then \ + echo "ERROR! Invalid target '$@' (supported: $(foreach avx,$(SUPPORTED_AVXS),madevent_$(avx)_link))"; exit 1; fi + $(MAKE) $(SELF_MF) USEGTEST=0 BACKEND=$* $(CUDACPP_BUILDDIR)/$(PROG)_cpp + rm -f $(PROG) + ln -s $(CUDACPP_BUILDDIR)/$(PROG)_cpp $(PROG) + +# Cudacpp bldall targets +ifeq ($(UNAME_P),ppc64le) + bldavxs: bldnone bldsse4 +else ifeq ($(UNAME_P),arm) + bldavxs: bldnone bldsse4 +else + bldavxs: bldnone bldsse4 bldavx2 bld512y bld512z +endif + +ifneq ($(shell which hipcc 2>/dev/null),) + ifneq ($(shell which nvcc 2>/dev/null),) + bldall: bldhip bldcuda bldavxs + else + bldall: bldhip bldavxs + endif +else + ifneq ($(shell which nvcc 2>/dev/null),) + bldall: bldcuda bldavxs + else + bldall: bldavxs + endif +endif + +bldcuda: $(PROG)_fortran $(DSIG_cudacpp) + @echo + $(MAKE) $(SELF_MF) USEBUILDDIR=1 BACKEND=cuda + +bldhip: $(PROG)_fortran $(DSIG_cudacpp) + @echo + $(MAKE) $(SELF_MF) USEBUILDDIR=1 BACKEND=hip + +bldnone: $(PROG)_fortran $(DSIG_cudacpp) + @echo + $(MAKE) $(SELF_MF) USEBUILDDIR=1 BACKEND=cppnone + +bldsse4: $(PROG)_fortran $(DSIG_cudacpp) + @echo + $(MAKE) $(SELF_MF) USEBUILDDIR=1 BACKEND=cppsse4 + +bldavx2: $(PROG)_fortran $(DSIG_cudacpp) + @echo + $(MAKE) $(SELF_MF) USEBUILDDIR=1 BACKEND=cppavx2 + +bld512y: $(PROG)_fortran $(DSIG_cudacpp) + @echo + $(MAKE) $(SELF_MF) USEBUILDDIR=1 BACKEND=cpp512y + +bld512z: $(PROG)_fortran $(DSIG_cudacpp) + @echo + $(MAKE) $(SELF_MF) USEBUILDDIR=1 BACKEND=cpp512z + +# Clean (NB: 'make clean' in Source calls 'make clean' in all P*) +clean: # Clean builds: fortran in this Pn; cudacpp executables for one AVX in this Pn + $(RM) *.o gensym $(PROG) $(PROG)_fortran $(PROG)_forhel \ + $(CUDACPP_BUILDDIR)/$(PROG)_cpp \ + $(CUDACPP_BUILDDIR)/$(PROG)_cuda \ + $(CUDACPP_BUILDDIR)/$(PROG)_hip + +cleanavxs: clean # Clean builds: fortran in this Pn; cudacpp for all AVX in this Pn and in src + $(MAKE) -f $(CUDACPP_MAKEFILE) cleanall + rm -f $(CUDACPP_BUILDDIR)/.cudacpplibs + rm -f .libs + +cleanall: # Clean builds: fortran in all P* and in Source; cudacpp for all AVX in all P* and in src + $(MAKE) -C ../../Source cleanall + rm -rf $(LIBDIR)libbias.$(libext) + rm -f ../../Source/*.mod ../../Source/*/*.mod + +distclean: cleanall # Clean all fortran and cudacpp builds as well as the googletest installation + $(MAKE) -f $(CUDACPP_MAKEFILE) distclean + diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py index 209f088314..839ab5c62e 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py @@ -121,6 +121,7 @@ class PLUGIN_ProcessExporter(PLUGIN_export_cpp.ProcessExporterGPU): s+'gpu/MadgraphTest.h', s+'gpu/runTest.cc', s+'gpu/testmisc.cc', s+'gpu/testxxx_cc_ref.txt', s+'gpu/valgrind.h', s+'gpu/perf.py', s+'gpu/profile.sh', + s+'gpu/cudacpp_overlay.mk', s+'CMake/SubProcesses/CMakeLists.txt'], 'test': [s+'gpu/cudacpp_test.mk']} @@ -144,6 +145,7 @@ class PLUGIN_ProcessExporter(PLUGIN_export_cpp.ProcessExporterGPU): 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', # this is generated from a template in Subprocesses but we still link it in P1 + 'cudacpp_overlay.mk', # this is generated from a template in Subprocesses but we still link it in P1 'testxxx.cc', # this is generated from a template in Subprocesses but we still link it in P1 'MemoryBuffers.h', # this is generated from a template in Subprocesses but we still link it in P1 'MemoryAccessCouplings.h', # this is generated from a template in Subprocesses but we still link it in P1 @@ -237,8 +239,8 @@ def finalize(self, matrix_element, cmdhistory, MG5options, outputflag): outputflags is a list of options provided when doing the output command""" ###misc.sprint('Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self)) if self.in_madevent_mode: - if 'CUDACPP_CODEGEN_PATCHLEVEL' in os.environ: patchlevel = os.environ['CUDACPP_CODEGEN_PATCHLEVEL'] - else: patchlevel = '' + # if 'CUDACPP_CODEGEN_PATCHLEVEL' in os.environ: patchlevel = os.environ['CUDACPP_CODEGEN_PATCHLEVEL'] + # else: patchlevel = '' # OLDEST implementation (AV) #path = os.path.realpath(os.curdir + os.sep + 'PLUGIN' + os.sep + 'CUDACPP_OUTPUT') #misc.sprint(path) @@ -255,17 +257,46 @@ def finalize(self, matrix_element, cmdhistory, MG5options, outputflag): # **NB** AV: change the Popen call to always dump stdout and stderr, because I want to always see the output # **NB** AV: this also allows error checking by looking for error strings on the generation log if patchMad.sh silently fails # **NB** AV: (e.g. this did happen in the past, when patchMad.sh was calling 'madevent treatcards run', and the latter silently failed) - plugin_path = os.path.dirname(os.path.realpath( __file__ )) + # plugin_path = os.path.dirname(os.path.realpath( __file__ )) ###p = subprocess.Popen([pjoin(plugin_path, 'patchMad.sh'), self.dir_path , 'PROD', str(patchlevel)], ### stdout=subprocess.PIPE, stderr=subprocess.PIPE) - p = subprocess.Popen([pjoin(plugin_path, 'patchMad.sh'), self.dir_path , 'PROD', str(patchlevel)]) # AV always dump patchMad.sh stdout/stderr - stdout, stderr = p.communicate() - misc.sprint(p.returncode) - if p.returncode != 0: # AV: WARNING! do not fully trust this check! patchMad.sh was observed to silently fail in the past... - logger.debug("####### \n stdout is \n %s", stdout) - logger.info("####### \n stderr is \n %s", stderr) - logger.info("return code is %s\n", p.returncode) - raise Exception('ERROR! the O/S call to patchMad.sh failed') + # p = subprocess.Popen([pjoin(plugin_path, 'patchMad.sh'), self.dir_path , 'PROD', str(patchlevel)]) # AV always dump patchMad.sh stdout/stderr + # stdout, stderr = p.communicate() + # misc.sprint(p.returncode) + # if p.returncode != 0: # AV: WARNING! do not fully trust this check! patchMad.sh was observed to silently fail in the past... + # logger.debug("####### \n stdout is \n %s", stdout) + # logger.info("####### \n stderr is \n %s", stderr) + # logger.info("return code is %s\n", p.returncode) + # raise Exception('ERROR! the O/S call to patchMad.sh failed') + + patch_coupl_write = r"""set -euo pipefail +# Get last fields from lines starting with WRITE(*,2) +gcs=$(awk '$1=="WRITE(*,2)" {print $NF}' coupl_write.inc) + +for gc in $gcs; do + if grep -q "$gc(VECSIZE_MEMMAX)" coupl.inc; then + awk -v gc="$gc" '{ + if ($1=="WRITE(*,2)" && $NF==gc) print $0"(1)"; + else print + }' coupl_write.inc > coupl_write.inc.new + mv coupl_write.inc.new coupl_write.inc + fi +done""" + try: + result = subprocess.run( + ["bash", "-lc", patch_coupl_write], + cwd=pjoin(self.dir_path, "Source", "MODEL"), + text=True, + capture_output=True, + check=True, # raise CalledProcessError on non-zero exit + ) + misc.sprint(result.returncode) + except subprocess.CalledProcessError as e: + logger.debug("####### \n stdout is \n %s", e.stdout) + logger.info("####### \n stderr is \n %s", e.stderr) + logger.info("return code is %s\n", e.returncode) + raise Exception("ERROR while patching coupl_write.inc") from e + # Additional patching (OM) self.add_madevent_plugin_fct() # Added by OM # do not call standard finalize since is this is already done... @@ -332,6 +363,7 @@ def change_output_args(args, cmd): return args class FortranExporterBridge(export_v4.ProcessExporterFortranMEGroup): + _file_path = export_v4._file_path def write_auto_dsig_file(self, writer, matrix_element, proc_id = ""): replace_dict,context = super().write_auto_dsig_file(False, matrix_element, proc_id) @@ -370,15 +402,121 @@ def write_auto_dsig_file(self, writer, matrix_element, proc_id = ""): #endif CALL COUNTERS_SMATRIX1MULTI_START( -1, VECSIZE_USED ) ! fortranMEs=-1""" replace_dict["OMP_POSTFIX"] = open(pjoin(PLUGINDIR,'madgraph','iolibs','template_files','gpu','smatrix_multi.f')).read().split('\n',4)[4] # AV skip 4 copyright lines - _file_path = export_v4._file_path if writer: - file = open(pjoin(_file_path, 'iolibs/template_files/auto_dsig_v4.inc')).read() + file = open(pjoin(self._file_path, 'iolibs/template_files/auto_dsig_v4.inc')).read() file = file % replace_dict # Write the file writer.writelines(file, context=context) else: return replace_dict, context + def write_driver(self, writer, *args, **kwargs): + """Write the SubProcess/driver.f file with additions from CUDACPP""" + replace_dict = super().write_driver(False, *args, **kwargs) + + # Additions from CUDACPP plugin (after patch) + replace_dict['CUDACPP_EXTRA_HEADER'] = """ + character*255 env_name, env_value + integer env_length, env_status + +#ifdef MG5AMC_MEEXPORTER_CUDACPP + INCLUDE 'fbridge.inc' +c INCLUDE 'fbridge_common.inc' +#endif + INCLUDE 'fbridge_common.inc' +""" + + replace_dict['CUDACPP_EXTRA_INITIALISE'] = """ +#ifdef _OPENMP + CALL OMPNUMTHREADS_NOT_SET_MEANS_ONE_THREAD() +#endif + CALL COUNTERS_INITIALISE() + +#ifdef MG5AMC_MEEXPORTER_CUDACPP + fbridge_mode = 1 ! CppOnly=1, default for CUDACPP +#else + fbridge_mode = 0 ! FortranOnly=0, default for FORTRAN +#endif + env_name = 'CUDACPP_RUNTIME_FBRIDGEMODE' + call get_environment_variable(env_name, env_value, env_length, env_status) + if( env_status.eq.0 ) then + write(*,*) 'Found environment variable "', trim(env_name), '" with value "', trim(env_value), '"' + read(env_value,'(I255)') FBRIDGE_MODE ! see https://gcc.gnu.org/onlinedocs/gfortran/ICHAR.html + write(*,*) 'FBRIDGE_MODE (from env) = ', FBRIDGE_MODE + else if( env_status.eq.1 ) then ! 1 = not defined + write(*,*) 'FBRIDGE_MODE (default) = ', FBRIDGE_MODE + else ! -1 = too long for env_value, 2 = not supported by O/S + write(*,*) 'ERROR! get_environment_variable failed for "', trim(env_name), '"' + STOP + endif +#ifndef MG5AMC_MEEXPORTER_CUDACPP + if( fbridge_mode.ne.0 ) then + write(*,*) 'ERROR! Invalid fbridge_mode (in FORTRAN backend mode) = ', fbridge_mode + STOP + endif +#endif + + vecsize_used = vecsize_memmax ! default ! CppOnly=1, default for CUDACPP + env_name = 'CUDACPP_RUNTIME_VECSIZEUSED' + call get_environment_variable(env_name, env_value, env_length, env_status) + if( env_status.eq.0 ) then + write(*,*) 'Found environment variable "', trim(env_name), '" with value "', trim(env_value), '"' + read(env_value,'(I255)') VECSIZE_USED ! see https://gcc.gnu.org/onlinedocs/gfortran/ICHAR.html + write(*,*) 'VECSIZE_USED (from env) = ', VECSIZE_USED + else if( env_status.eq.1 ) then ! 1 = not defined + write(*,*) 'VECSIZE_USED (default) = ', VECSIZE_USED + else ! -1 = too long for env_value, 2 = not supported by O/S + write(*,*) 'ERROR! get_environment_variable failed for "', trim(env_name), '"' + STOP + endif + if( VECSIZE_USED.gt.VECSIZE_MEMMAX .or. VECSIZE_USED.le.0 ) then + write(*,*) 'ERROR! Invalid VECSIZE_USED = ', VECSIZE_USED + STOP + endif + +#ifdef MG5AMC_MEEXPORTER_CUDACPP + CALL FBRIDGECREATE(FBRIDGE_PBRIDGE, VECSIZE_USED, NEXTERNAL, 4) ! this must be at the beginning as it initialises the CUDA device + FBRIDGE_NCBYF1 = 0 + FBRIDGE_CBYF1SUM = 0 + FBRIDGE_CBYF1SUM2 = 0 + FBRIDGE_CBYF1MAX = -1D100 + FBRIDGE_CBYF1MIN = 1D100 +#endif +""" + + replace_dict['CUDACPP_EXTRA_FINALISE'] = """ +#ifdef MG5AMC_MEEXPORTER_CUDACPP + CALL FBRIDGEDELETE(FBRIDGE_PBRIDGE) ! this must be at the end as it shuts down the CUDA device + IF( FBRIDGE_MODE .LE. -1 ) THEN ! (BothQuiet=-1 or BothDebug=-2) + WRITE(*,'(a,f10.8,a,e8.2)') + & ' [MERATIOS] ME ratio CudaCpp/Fortran: MIN = ', + & FBRIDGE_CBYF1MIN + 1, ' = 1 - ', -FBRIDGE_CBYF1MIN + WRITE(*,'(a,f10.8,a,e8.2)') + & ' [MERATIOS] ME ratio CudaCpp/Fortran: MAX = ', + & FBRIDGE_CBYF1MAX + 1, ' = 1 + ', FBRIDGE_CBYF1MAX + WRITE(*,'(a,i6)') + & ' [MERATIOS] ME ratio CudaCpp/Fortran: NENTRIES = ', + & FBRIDGE_NCBYF1 +c WRITE(*,'(a,e8.2)') +c & ' [MERATIOS] ME ratio CudaCpp/Fortran - 1: AVG = ', +c & FBRIDGE_CBYF1SUM / FBRIDGE_NCBYF1 +c WRITE(*,'(a,e8.2)') +c & ' [MERATIOS] ME ratio CudaCpp/Fortran - 1: STD = ', +c & SQRT( FBRIDGE_CBYF1SUM2 / FBRIDGE_NCBYF1 ) ! ~standard deviation + WRITE(*,'(a,e8.2,a,e8.2)') + & ' [MERATIOS] ME ratio CudaCpp/Fortran - 1: AVG = ', + & FBRIDGE_CBYF1SUM / FBRIDGE_NCBYF1, ' +- ', + & SQRT( FBRIDGE_CBYF1SUM2 ) / FBRIDGE_NCBYF1 ! ~standard error + ENDIF +#endif + CALL COUNTERS_FINALISE() +""" + + if writer: + text = open(pjoin(self._file_path,'iolibs','template_files','madevent_driver.f')).read() % replace_dict + writer.write(text) + return True + return replace_dict #------------------------------------------------------------------------------------ class GPU_ProcessExporter(PLUGIN_ProcessExporter_MadEvent): diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh deleted file mode 100755 index 6122cee227..0000000000 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh +++ /dev/null @@ -1,101 +0,0 @@ -#!/bin/bash -# Copyright (C) 2020-2024 CERN and UCLouvain. -# Licensed under the GNU Lesser General Public License (version 3 or later). -# Created by: A. Valassi (Mar 2022) for the MG5aMC CUDACPP plugin. -# Further modified by: O. Mattelaer, A. Valassi (2022-2024) for the MG5aMC CUDACPP plugin. - -set -e # immediate exit on error - -status=0 - -scrdir=$(cd $(dirname $0); pwd) - -function usage() -{ - echo "ERROR! Unknown command '$0 $*'" - echo "Usage: $0 [--nopatch|--upstream]" - exit 1 -} - -# Patch level -###patchlevel=0 # [--upstream] out of the box codegen from upstream MG5AMC (do not even copy templates) -###patchlevel=1 # [--nopatch] modify upstream MG5AMC but do not apply patch commands (reference to prepare new patches) -patchlevel=2 # [DEFAULT] complete generation of cudacpp .sa/.mad (copy templates and apply patch commands) - -if [ "$2" == "" ]; then - usage $* -elif [ "$3" == "--nopatch" ]; then - if [ "$4" != "" ]; then usage; fi - patchlevel=1 -elif [ "$3" == "--upstream" ]; then - if [ "$4" != "" ]; then usage; fi - patchlevel=0 -elif [ "$3" != "" ]; then - usage $* -fi -dir=$1 -dir_patches=$2 -###echo "Current dir: $pwd" -###echo "Input dir to patch: $dir" - -if [ ! -e ${dir} ]; then echo "ERROR! Directory $dir does not exist"; exit 1; fi - -# Exit here for patchlevel 0 (--upstream) -if [ "${patchlevel}" == "0" ]; then exit $status; fi - -# Patch the default Fortran code to provide the integration with the cudacpp plugin -# (1) Process-independent patches -touch ${dir}/Events/.keep # this file should already be present (mg5amcnlo copies it from Template/LO/Events/.keep) -#\cp -pr ${scrdir}/MG5aMC_patches/${dir_patches}/fbridge_common.inc ${dir}/SubProcesses # new file -if [ "${patchlevel}" == "2" ]; then - cd ${dir} - echo "DEBUG: cd ${PWD}; patch -p4 -i ${scrdir}/MG5aMC_patches/${dir_patches}/patch.common" - if ! patch -p4 -i ${scrdir}/MG5aMC_patches/${dir_patches}/patch.common; then status=1; fi - \rm -f Source/*.orig - \rm -f bin/internal/*.orig - cd - > /dev/null -fi -for p1dir in ${dir}/SubProcesses/P*; do - cd $p1dir - #ln -sf ../fbridge_common.inc . # new file - #cp -pr ${scrdir}/MG5aMC_patches/${dir_patches}/counters.cc . # new file - #cp -pr ${scrdir}/MG5aMC_patches/${dir_patches}/ompnumthreads.cc . # new file - if [ "${patchlevel}" == "2" ]; then - echo "DEBUG: cd ${PWD}; patch -p6 -i ${scrdir}/MG5aMC_patches/${dir_patches}/patch.P1" - if ! patch -p6 -i ${scrdir}/MG5aMC_patches/${dir_patches}/patch.P1; then status=1; fi - fi - \rm -f *.orig - cd - > /dev/null -done - -# Patch the default Fortran code to provide the integration with the cudacpp plugin -# (2) Process-dependent patches -cd ${dir}/Source/MODEL > /dev/null -gcs=$(cat coupl_write.inc | awk '{if($1=="WRITE(*,2)") print $NF}') # different printouts for scalar/vector couplings #456 -for gc in $gcs; do - if grep -q "$gc(VECSIZE_MEMMAX)" coupl.inc; then - ###echo "DEBUG: Coupling $gc is a vector" - cat coupl_write.inc | awk -vgc=$gc '{if($1=="WRITE(*,2)" && $NF==gc) print $0"(1)"; else print $0}' > coupl_write.inc.new - \mv coupl_write.inc.new coupl_write.inc - ###else - ### echo "DEBUG: Coupling $gc is a scalar" - fi -done -cd - > /dev/null - -# Patch the default cudacpp code to fix a bug in coloramps -# ** NEW AUG 2023: DISABLING THE COLORAMPS PATCH FIXES THE LHE COLOR MISMATCH IN GG_TTGG (#655 and #713) ** -# (3) Process-dependent patches -#for p1dir in ${dir}/SubProcesses/P*; do -# cd $p1dir -# cat coloramps.h | awk -vp=1 '{if (p==1) print $0; if ($1=="__device__") p=0}' > coloramps.h.new -# cat coloramps.inc | sed 's|)/|)/ {|' | sed 's|/$|}, /|' \ -# | awk -vb= '{if($1~")/"){b=$2}; if($1=="$"){b=b$2}; if($3=="/"){print " "b}}' \ -# | sed 's/.TRUE./ true/g' | sed 's/.FALSE./ false/g' | sed 's/}/ }/' >> coloramps.h.new -# truncate -s -2 coloramps.h.new -# echo "">> coloramps.h.new -# cat coloramps.h | awk -vp=0 '{if ($1=="};") p=1; if (p==1) print $0}' >> coloramps.h.new -# \mv coloramps.h.new coloramps.h -#done - -exit $status