Skip to content

Commit 0632bf5

Browse files
Test H CI (PaddlePaddle#76321)
* Test H CI * Test H CI * Test H CI * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * test fa cache * test fa cache * test update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update * Update
1 parent 42b8146 commit 0632bf5

File tree

2 files changed

+543
-0
lines changed

2 files changed

+543
-0
lines changed

.github/workflows/H-Coverage.yml

Lines changed: 326 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,326 @@
1+
name: CI-H-Coverage
2+
3+
on:
4+
pull_request:
5+
types: [opened, synchronize]
6+
branches: [develop, release/**]
7+
8+
permissions: read-all
9+
10+
concurrency:
11+
group: ${{ github.event.pull_request.number }}-${{ github.workflow }}
12+
cancel-in-progress: true
13+
14+
env:
15+
PR_ID: ${{ github.event.pull_request.number }}
16+
COMMIT_ID: ${{ github.event.pull_request.head.sha }}
17+
TASK: paddle-CI-${{ github.event.pull_request.number }}-coverage
18+
ci_scripts: /paddle/ci
19+
BRANCH: ${{ github.base_ref }}
20+
work_dir: /paddle
21+
PADDLE_ROOT: /paddle
22+
GIT_PR_ID: ${{ github.event.pull_request.number }}
23+
CI_name: h-coverage
24+
CFS_DIR: /home/data/cfs
25+
no_proxy: "bcebos.com,apiin.im.baidu.com,gitee.com,aliyun.com,.baidu.com,.tuna.tsinghua.edu.cn"
26+
27+
defaults:
28+
run:
29+
shell: bash
30+
31+
jobs:
32+
clone:
33+
name: Coverage clone
34+
uses: ./.github/workflows/_Clone-linux.yml
35+
with:
36+
workflow-name: 'coverage'
37+
clone_dir: Paddle-coverage
38+
39+
build:
40+
name: Coverage build
41+
needs: [clone]
42+
if: needs.clone.outputs.can-skip != 'true'
43+
runs-on:
44+
group: GZ_BD-CPU
45+
outputs:
46+
can-skip: ${{ steps.check-bypass.outputs.can-skip }}
47+
48+
steps:
49+
- name: Check docker image and run container
50+
env:
51+
CACHE_DIR: "/root/.cache/coverage"
52+
CCACHE_DIR: "/root/.ccache/h-coverage"
53+
FLAGS_fraction_of_gpu_memory_to_use: 0.15
54+
CTEST_PARALLEL_LEVEL: 2
55+
WITH_GPU: "ON"
56+
CUDA_ARCH_NAME: Hopper
57+
WITH_AVX: "ON"
58+
PADDLE_VERSION: 0.0.0
59+
CUDA_VISIBLE_DEVICES: 0,1
60+
WITH_DISTRIBUTE: "ON"
61+
LITE_GIT_TAG: develop
62+
WITH_UNITY_BUILD: "ON"
63+
WITH_FA_BUILD_WITH_CACHE: "ON"
64+
PY_VERSION: "3.10"
65+
INFERENCE_DEMO_INSTALL_DIR: /root/.cache/coverage
66+
CCACHE_MAXSIZE: 200G
67+
CCACHE_LIMIT_MULTIPLE: 0.8
68+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
69+
GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
70+
run: |
71+
container_name=${TASK}-build-$(date +%Y%m%d-%H%M%S)
72+
echo "container_name=${container_name}" >> ${{ github.env }}
73+
docker_image=ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle:cuda129-coverage-test
74+
docker run -d -t --name ${container_name} \
75+
-v "/home/data/cfs:/home/data/cfs" \
76+
-v "/home/data/cfs/.cache:/root/.cache" \
77+
-v "/home/data/cfs/.ccache:/root/.ccache" \
78+
-v "/dev/shm:/dev/shm" \
79+
-v ${{ github.workspace }}/../../..:${{ github.workspace }}/../../.. \
80+
-v ${{ github.workspace }}:/paddle \
81+
-e CI_name \
82+
-e BRANCH \
83+
-e PR_ID \
84+
-e COMMIT_ID \
85+
-e work_dir \
86+
-e PADDLE_ROOT \
87+
-e GIT_PR_ID \
88+
-e CACHE_DIR \
89+
-e CCACHE_DIR \
90+
-e ci_scripts \
91+
-e FLAGS_fraction_of_gpu_memory_to_use \
92+
-e CTEST_PARALLEL_LEVEL \
93+
-e WITH_GPU \
94+
-e CUDA_ARCH_NAME \
95+
-e WITH_AVX \
96+
-e PADDLE_VERSION \
97+
-e WITH_DISTRIBUTE \
98+
-e LITE_GIT_TAG \
99+
-e WITH_UNITY_BUILD \
100+
-e WITH_FA_BUILD_WITH_CACHE \
101+
-e PY_VERSION \
102+
-e INFERENCE_DEMO_INSTALL_DIR \
103+
-e CCACHE_MAXSIZE \
104+
-e CCACHE_LIMIT_MULTIPLE \
105+
-e GITHUB_TOKEN \
106+
-e GITHUB_API_TOKEN \
107+
-e CFS_DIR \
108+
-e no_proxy \
109+
-w /paddle --network host ${docker_image}
110+
111+
- name: Download paddle.tar.gz and update test branch
112+
run: |
113+
docker exec -t ${{ env.container_name }} /bin/bash -c '
114+
rm -rf * .[^.]*
115+
set -e
116+
echo "Downloading Paddle.tar.gz"
117+
wget -q --tries=5 --no-proxy https://paddle-github-action.bj.bcebos.com/PR/Paddle-coverage/${PR_ID}/${COMMIT_ID}/Paddle.tar.gz --no-check-certificate
118+
echo "Extracting Paddle.tar.gz"
119+
tar -xf Paddle.tar.gz --strip-components=1
120+
rm Paddle.tar.gz
121+
git config --global --add safe.directory "*"
122+
git remote -v
123+
set +e
124+
git remote add upstream https://github.com/PaddlePaddle/Paddle.git
125+
set -e
126+
git config pull.rebase false
127+
git checkout test
128+
echo "Pull upstream $BRANCH"
129+
source ${{ github.workspace }}/../../../proxy
130+
bash ci/git_pull.sh $BRANCH
131+
'
132+
133+
- name: Check bypass
134+
id: check-bypass
135+
uses: ./.github/actions/check-bypass
136+
with:
137+
github-token: ${{ secrets.GITHUB_TOKEN }}
138+
workflow-name: h-ci
139+
140+
- name: Build
141+
if: steps.check-bypass.outputs.can-skip != 'true'
142+
run: |
143+
docker exec -t ${{ env.container_name }} /bin/bash -c '
144+
flashattn_version=$(git submodule status|grep flashattn|awk "{print \$1}"|sed "s#-##g")
145+
url="https://xly-devops.bj.bcebos.com/gpups/flash-attention/cu90/flashattn_libs_${flashattn_version}.tar"
146+
url_return=`curl -s -o /dev/null -w "%{http_code}" $url`
147+
if [ "$url_return" != "200" ];then
148+
echo "flashattn cache not found, please contact umiswing"
149+
exit 7
150+
fi
151+
mkdir -p ${CFS_DIR}/.cache/coverage
152+
mkdir -p ${CFS_DIR}/.ccache/coverage
153+
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-12.9/compat
154+
source ${{ github.workspace }}/../../../proxy
155+
pip install -r python/requirements.txt
156+
mkdir build && cd build
157+
cmake .. -DPY_VERSION=3.10 -DWITH_GPU=ON -DWITH_DISTRIBUTE=ON -DWITH_TESTING=ON -DCUDA_ARCH_NAME=Hopper -DFA_JOB_POOLS_COMPILE=1 -DWITH_CUDNN_FRONTEND=ON -DON_INFER=OFF
158+
make -j20
159+
'
160+
161+
- name: Clean up env
162+
if: steps.check-bypass.outputs.can-skip != 'true'
163+
run: |
164+
docker exec -t ${{ env.container_name }} /bin/bash -c '
165+
source ~/.bashrc
166+
source ${ci_scripts}/utils.sh; clean_build_files
167+
rm -rf $(find . -name "*.a")
168+
rm -rf $(find . -name "*.o")
169+
rm -rf lib.linux-x86_64-3.9
170+
find ./ -name "eager_generator" -or -name "kernel_signature_generator" -or -name "eager_legacy_op_function_generator" | xargs rm -rf
171+
rm -rf ./python/build/lib.linux-x86_64-3.9/
172+
cd "${work_dir}/build/third_party" && find $(ls | grep -v "dlpack" | grep -v "install" | grep -v "eigen3" | grep -v "gflags") -type f ! -name "*.so" -a ! -name "libdnnl.so*" -delete
173+
cd /
174+
tar --use-compress-program="pzstd -1" -cf Paddle.tar.gz paddle
175+
'
176+
177+
- name: Upload coverage product
178+
if: steps.check-bypass.outputs.can-skip != 'true'
179+
env:
180+
home_path: ${{ github.workspace }}/..
181+
bos_file: ${{ github.workspace }}/../bos_retry/BosClient.py
182+
paddle_whl: paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
183+
run: |
184+
docker exec -t ${{ env.container_name }} /bin/bash -c '
185+
echo "::group::Install bce-python-sdk"
186+
python -m pip install bce-python-sdk==0.8.74
187+
echo "::endgroup::"
188+
export AK=paddle
189+
export SK=paddle
190+
if [ ! -f "${{ env.bos_file }}" ]; then
191+
wget -q --no-proxy -O ${{ env.home_path }}/bos_retry.tar.gz https://xly-devops.bj.bcebos.com/home/bos_retry.tar.gz --no-check-certificate
192+
mkdir ${{ env.home_path }}/bos_retry
193+
tar xf ${{ env.home_path }}/bos_retry.tar.gz -C ${{ env.home_path }}/bos_retry
194+
fi
195+
cd /paddle
196+
mv /Paddle.tar.gz .
197+
cp ./build/python/dist/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl .
198+
echo "Uploading Paddle.tar.gz"
199+
python ${{ env.bos_file }} Paddle.tar.gz paddle-github-action/PR/h-coverage/${{ env.PR_ID }}/${{ env.COMMIT_ID }}
200+
echo "Uploading coverage wheel"
201+
python ${{ env.bos_file }} ${{ env.paddle_whl }} paddle-github-action/PR/h-coverage/${{ env.PR_ID }}/${{ env.COMMIT_ID }}
202+
echo "End Upload"
203+
'
204+
205+
- name: Terminate and delete the container
206+
if: ${{ steps.check-bypass.outputs.can-skip != 'true' && always() }}
207+
run: |
208+
set +e
209+
docker exec -t ${{ env.container_name }} /bin/bash -c 'rm -rf * .[^.]*'
210+
docker stop ${{ env.container_name }}
211+
docker rm ${{ env.container_name }}
212+
213+
test:
214+
name: Coverage test
215+
needs: [build]
216+
runs-on:
217+
group: H-Coverage
218+
steps:
219+
- name: Determine the runner
220+
run: |
221+
runner_name=`(echo $PWD|awk -F '/' '{print $3}')`
222+
echo $runner_name
223+
wget -q https://xly-devops.bj.bcebos.com/utils.sh
224+
source utils.sh
225+
determine_gpu_runner ${runner_name}
226+
227+
- name: Check docker image and run container
228+
env:
229+
CACHE_DIR: "/root/.cache/coverage"
230+
CCACHE_DIR: "/root/.ccache/coverage"
231+
FLAGS_fraction_of_gpu_memory_to_use: 0.15
232+
CTEST_PARALLEL_LEVEL: 2
233+
WITH_GPU: "ON"
234+
CUDA_ARCH_NAME: Hopper
235+
WITH_AVX: "ON"
236+
COVERALLS_UPLOAD: "ON"
237+
PADDLE_VERSION: 0.0.0
238+
WITH_DISTRIBUTE: "ON"
239+
WITH_UNITY_BUILD: "ON"
240+
PY_VERSION: "3.10"
241+
WITH_SHARED_PHI: "ON"
242+
GPU_DEVICES: ${{ env.GPU_DEVICES }}
243+
WITH_CINN: "ON"
244+
INFERENCE_DEMO_INSTALL_DIR: /root/.cache/coverage
245+
CCACHE_MAXSIZE: 200G
246+
CCACHE_LIMIT_MULTIPLE: 0.8
247+
FLAGS_PIR_OPTEST: "TRUE"
248+
ON_INFER: "ON"
249+
COVERAGE_FILE: ${{ github.workspace }}/build/python-coverage.data
250+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
251+
GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
252+
run: |
253+
container_name=${TASK}-$(date +%Y%m%d-%H%M%S)
254+
echo "container_name=${container_name}" >> ${{ github.env }}
255+
docker_image=ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle:cuda129-coverage-test
256+
docker run -d -t --gpus "\"device=${GPU_DEVICES}\"" --name ${container_name} \
257+
-v "/home/data/cfs:/home/data/cfs" \
258+
-v "/home/data/cfs/.cache:/root/.cache" \
259+
-v "/home/data/cfs/.ccache:/root/.ccache" \
260+
-v "/dev/shm:/dev/shm" \
261+
-v ${{ github.workspace }}/../../..:${{ github.workspace }}/../../.. \
262+
-v ${{ github.workspace }}:/paddle \
263+
-e CI_name \
264+
-e BRANCH \
265+
-e PR_ID \
266+
-e COMMIT_ID \
267+
-e work_dir \
268+
-e PADDLE_ROOT \
269+
-e GIT_PR_ID \
270+
-e CACHE_DIR \
271+
-e CCACHE_DIR \
272+
-e ci_scripts \
273+
-e FLAGS_fraction_of_gpu_memory_to_use \
274+
-e CTEST_PARALLEL_LEVEL \
275+
-e WITH_GPU \
276+
-e CUDA_ARCH_NAME \
277+
-e WITH_AVX \
278+
-e WITH_COVERAGE \
279+
-e COVERALLS_UPLOAD \
280+
-e PADDLE_VERSION \
281+
-e WITH_DISTRIBUTE \
282+
-e WITH_UNITY_BUILD \
283+
-e PY_VERSION \
284+
-e WITH_SHARED_PHI \
285+
-e WITH_CINN \
286+
-e INFERENCE_DEMO_INSTALL_DIR \
287+
-e CCACHE_MAXSIZE \
288+
-e CCACHE_LIMIT_MULTIPLE \
289+
-e FLAGS_PIR_OPTEST \
290+
-e ON_INFER \
291+
-e COVERAGE_FILE \
292+
-e GITHUB_TOKEN \
293+
-e GITHUB_API_TOKEN \
294+
-e CFS_DIR \
295+
-e no_proxy \
296+
-w /paddle --network host ${docker_image}
297+
298+
- name: Download paddle.tar.gz and update test branch
299+
run: |
300+
docker exec -t ${{ env.container_name }} /bin/bash -c '
301+
rm -rf * .[^.]*
302+
set -e
303+
echo "Downloading Paddle.tar.gz from cfs"
304+
wget -q --tries=5 --no-proxy https://paddle-github-action.bj.bcebos.com/PR/h-coverage/${PR_ID}/${COMMIT_ID}/Paddle.tar.gz --no-check-certificate
305+
echo "Extracting Paddle.tar.gz"
306+
tar --use-compress-program="pzstd -1" -xf Paddle.tar.gz --strip-components=1
307+
rm Paddle.tar.gz
308+
'
309+
310+
- name: Test
311+
run: |
312+
docker exec -t ${{ env.container_name }} /bin/bash -c '
313+
source ${{ github.workspace }}/../../../proxy
314+
pip install build//python/dist/*.whl --no-deps
315+
pip install -r python/unittest_py/requirements.txt
316+
bash $ci_scripts/h-test.sh
317+
'
318+
319+
- name: Terminate and delete the container
320+
if: always()
321+
run: |
322+
set +e
323+
rm Paddle.tar.gz
324+
docker exec -t ${{ env.container_name }} /bin/bash -c 'rm -rf * .[^.]*'
325+
docker stop ${{ env.container_name }}
326+
docker rm ${{ env.container_name }}

0 commit comments

Comments
 (0)