diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 39c7266..7e91b17 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -56,7 +56,7 @@ jobs: - uses: actions/checkout@v2 - name: Clone oneflow run: | - git clone https://github.com/Oneflow-Inc/oneflow --depth=1 + git clone https://github.com/Oneflow-Inc/oneflow --depth=1 --branch for_serving_test - name: Set environment variables run: | set -x @@ -144,17 +144,17 @@ jobs: docker run $extra_docker_args ${{ env.image_tag }} sleep 3600 docker exec -w $(pwd) ${{ env.container_name }} pip3 install -r ./ci/test/requirement.txt -i https://pypi.tuna.tsinghua.edu.cn/simple docker exec -w $(pwd) ${{ env.container_name }} bash ./ci/test/run_tests.sh - - name: Login to ACR with the AccessKey pair - uses: aliyun/acr-login@v1 - with: - login-server: https://registry.${{ env.REGION_ID }}.aliyuncs.com - username: "${{ secrets.ACR_USERNAME }}" - password: "${{ secrets.ACR_PASSWORD }}" - - name: Docker push to ACR - if: github.event_name == 'release' || github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' - run: | - docker tag ${{ env.image_tag }} ${{ env.acr_image_tag }} - docker push ${{ env.acr_image_tag }} + # - name: Login to ACR with the AccessKey pair + # uses: aliyun/acr-login@v1 + # with: + # login-server: https://registry.${{ env.REGION_ID }}.aliyuncs.com + # username: "${{ secrets.ACR_USERNAME }}" + # password: "${{ secrets.ACR_PASSWORD }}" + # - name: Docker push to ACR + # if: github.event_name == 'release' || github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' + # run: | + # docker tag ${{ env.image_tag }} ${{ env.acr_image_tag }} + # docker push ${{ env.acr_image_tag }} - name: Remove container run: docker container rm -f ${{ env.container_name }} - name: Remove image @@ -174,43 +174,43 @@ jobs: echo "::set-output name=image_tag::${{ env.image_tag }}" echo "::set-output name=acr_image_tag::${{ env.acr_image_tag }}" - docker_push: - name: Push to docker hub - runs-on: ubuntu-latest - needs: [build_test] - if: github.event_name == 'release'|| github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' - steps: - - name: Set environment variables - run: | - echo "image_tag=oneflowinc/${{ needs.build_test.outputs.image_tag }}" >> $GITHUB_ENV - echo "acr_image_tag=${{ needs.build_test.outputs.acr_image_tag }}" >> $GITHUB_ENV - - name: Output environment variables - run: | - echo ${{ env.acr_image_tag }} - echo ${{ env.image_tag }} - - name: Login to ACR with the AccessKey pair - uses: aliyun/acr-login@v1 - with: - login-server: https://registry.${{ env.REGION_ID }}.aliyuncs.com - username: "${{ secrets.ACR_USERNAME }}" - password: "${{ secrets.ACR_PASSWORD }}" - - name: Login to docker hub - uses: docker/login-action@v1 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Pull, tag and push - run: | - docker pull ${{ env.acr_image_tag }} - docker tag ${{ env.acr_image_tag }} ${{ env.image_tag }} - docker push ${{ env.image_tag }} - - name: Pull, tag and push - run: | - docker pull ${{ env.acr_image_tag }} - docker tag ${{ env.acr_image_tag }} ${{ env.image_tag }} - docker push ${{ env.image_tag }} - - name: Push latest - if: github.event_name == 'release' || github.event_name == 'workflow_dispatch' - run: | - docker tag ${{ env.acr_image_tag }} oneflowinc/${{ env.SERVING_IMAGE }}:latest - docker push oneflowinc/${{ env.SERVING_IMAGE }}:latest + # docker_push: + # name: Push to docker hub + # runs-on: ubuntu-latest + # needs: [build_test] + # if: github.event_name == 'release'|| github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' + # steps: + # - name: Set environment variables + # run: | + # echo "image_tag=oneflowinc/${{ needs.build_test.outputs.image_tag }}" >> $GITHUB_ENV + # echo "acr_image_tag=${{ needs.build_test.outputs.acr_image_tag }}" >> $GITHUB_ENV + # - name: Output environment variables + # run: | + # echo ${{ env.acr_image_tag }} + # echo ${{ env.image_tag }} + # - name: Login to ACR with the AccessKey pair + # uses: aliyun/acr-login@v1 + # with: + # login-server: https://registry.${{ env.REGION_ID }}.aliyuncs.com + # username: "${{ secrets.ACR_USERNAME }}" + # password: "${{ secrets.ACR_PASSWORD }}" + # - name: Login to docker hub + # uses: docker/login-action@v1 + # with: + # username: ${{ secrets.DOCKERHUB_USERNAME }} + # password: ${{ secrets.DOCKERHUB_TOKEN }} + # - name: Pull, tag and push + # run: | + # docker pull ${{ env.acr_image_tag }} + # docker tag ${{ env.acr_image_tag }} ${{ env.image_tag }} + # docker push ${{ env.image_tag }} + # - name: Pull, tag and push + # run: | + # docker pull ${{ env.acr_image_tag }} + # docker tag ${{ env.acr_image_tag }} ${{ env.image_tag }} + # docker push ${{ env.image_tag }} + # - name: Push latest + # if: github.event_name == 'release' || github.event_name == 'workflow_dispatch' + # run: | + # docker tag ${{ env.acr_image_tag }} oneflowinc/${{ env.SERVING_IMAGE }}:latest + # docker push oneflowinc/${{ env.SERVING_IMAGE }}:latest diff --git a/ci/build/backend.sh b/ci/build/backend.sh index 23125bb..f38527b 100644 --- a/ci/build/backend.sh +++ b/ci/build/backend.sh @@ -2,8 +2,8 @@ set -euxo pipefail # build oneflow-backend -git config --global http.proxy ${HTTP_PROXY} -git config --global https.proxy ${HTTP_PROXY} +# git config --global http.proxy ${HTTP_PROXY} +# git config --global https.proxy ${HTTP_PROXY} mkdir -p build cd build @@ -11,5 +11,7 @@ cmake -DCMAKE_PREFIX_PATH=$ONEFLOW_CI_BUILD_DIR/liboneflow_cpp/share \ -DTRITON_RELATED_REPO_TAG=r$TRITON_VERSION \ -DTRITON_ENABLE_GPU=ON \ -DTHIRD_PARTY_MIRROR=aliyun \ + -DBUILD_ONEFLOW_BACKEND=ON \ + -DBUILD_ONEFLOW_LITE_BACKEND=OFF \ -G Ninja .. ninja -j8 diff --git a/ci/build/oneflow-serving.py b/ci/build/oneflow-serving.py index 3a54dcb..59cfca6 100755 --- a/ci/build/oneflow-serving.py +++ b/ci/build/oneflow-serving.py @@ -102,7 +102,8 @@ def __init__(self) -> None: def prepare(self): self._parse() - self._unknown.extend(['--disable-auto-complete-config']) + # self._unknown.extend(['--disable-auto-complete-config']) + self._unknown.extend(['--strict-model-config', 'false']) self._unknown_split = [] for argument in self._unknown: self._unknown_split.extend(argument.split('=')) diff --git a/ci/build/oneflow.sh b/ci/build/oneflow.sh index 87a5513..1baaa56 100644 --- a/ci/build/oneflow.sh +++ b/ci/build/oneflow.sh @@ -30,9 +30,10 @@ else if [ "$oneflow_head_built" != "$oneflow_head" ]; then build_oneflow else - cached_whl=$(ls $WHEELHOUSE_DIR) - python3 -m pip install $WHEELHOUSE_DIR/$cached_whl - > $export_pythonpath_script - echo "Use build cache for oneflow." + # cached_whl=$(ls $WHEELHOUSE_DIR) + # python3 -m pip install $WHEELHOUSE_DIR/$cached_whl + # > $export_pythonpath_script + # echo "Use build cache for oneflow." + build_oneflow fi fi diff --git a/ci/test/requirement.txt b/ci/test/requirement.txt index 0e915d9..465abad 100644 --- a/ci/test/requirement.txt +++ b/ci/test/requirement.txt @@ -1,2 +1,2 @@ -tritonclient[all] +tritonclient[all]==2.26.0 Jinja2 \ No newline at end of file diff --git a/docker/Dockerfile b/docker/Dockerfile index 8bf3b9d..dd16cbc 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -42,10 +42,15 @@ RUN sed -i 's/archive.ubuntu.com/mirrors.ustc.edu.cn/g' /etc/apt/sources.list && pip3 install cmake ENV DCGM_VERSION 2.2.9 +# # Install DCGM. Steps from https://developer.nvidia.com/dcgm#Downloads +# RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin && \ +# mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \ +# apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub && \ +# add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /" && \ +# apt-get update && apt-get install -y datacenter-gpu-manager=1:2.2.9 # Install DCGM. Steps from https://developer.nvidia.com/dcgm#Downloads -RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin && \ - mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \ - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub && \ +RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb && \ + dpkg -i cuda-keyring_1.0-1_all.deb && \ add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /" && \ apt-get update && apt-get install -y datacenter-gpu-manager=1:2.2.9 diff --git a/test/test_lite/test.sh b/test/test_lite/test.sh new file mode 100755 index 0000000..9d9355c --- /dev/null +++ b/test/test_lite/test.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +set -euxo pipefail + +export CUDA_VISIBLE_DEVICES=0 + +rm -rf ./models +mkdir -p models/resnet50/1 +cp -r ../common/model models/resnet50/1/ + +# generate minimal config.pbtxt +echo "name: \"resnet50\"" >> models/resnet50/config.pbtxt +echo "backend: \"oneflow\"" >> models/resnet50/config.pbtxt + +SERVER=/opt/tritonserver/bin/tritonserver +SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1 --strict-model-config false" +SERVER_LOG="./inference_server.log" +source ../common/util.sh + +run_server +if [ "$SERVER_PID" == "0" ]; then + echo -e "\n***\n*** Failed to start $SERVER\n***" + cat $SERVER_LOG + exit 1 +fi + +echo "running resnet50 basic test with oneflow lite backend" +python3 ../common/test_model.py --model resnet50 --target-output ../common/resnet50_output.npy + +kill $SERVER_PID +wait $SERVER_PID + +exit 0 diff --git a/test/test_resnet50/test.sh b/test/test_resnet50/test.sh index 0c2f759..92dfcdd 100755 --- a/test/test_resnet50/test.sh +++ b/test/test_resnet50/test.sh @@ -28,9 +28,8 @@ fi echo "running resnet50 basic test" python3 ../common/test_model.py --model resnet50 --target-output ../common/resnet50_output.npy -echo "running resnet50 batching test" -python3 ../common/test_model.py --model resnet50_batching --target-output ../common/resnet50_output.npy - +# echo "running resnet50 batching test" +# python3 ../common/test_model.py --model resnet50_batching --target-output ../common/resnet50_output.npy kill $SERVER_PID wait $SERVER_PID