Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions language/llama2-70b/SUT.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,15 @@ def load_model(self):
)
print("Loaded model")

if torch.cuda.is_available():
num_gpus = torch.cuda.device_count()
if num_gpus > 1:
print(f"Using {num_gpus} GPUs via DataParallel")
self.model = torch.nn.DataParallel(self.model)
self.model.to("cuda")
else:
self.model.to(self.device)

self.device = torch.device(self.device)
if self.device == "cpu":
self.model = self.model.to(
Expand Down
38 changes: 38 additions & 0 deletions language/llama2-70b/app_launch.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/bin/bash

MLCOMMONS_REPO_PATH="$(dirname "$(dirname "$PWD")")"

# Add any volume mounts here with the following syntax
# /path/to/src:/path/to/dir/in/container
MOUNTS=(
$MLCOMMONS_REPO_PATH:$MLCOMMONS_REPO_PATH
/share:/share,
)

# Set up docker environment file for current user
CI_BUILD_USER=$(id -u -n)
CI_BUILD_UID=$(id -u)
CI_BUILD_GROUP=$(id -g -n)
CI_BUILD_GID=$(id -g)


# Build container
apptainer build llm_gpubringup.sif llm_gpubringup.def

# Build mount flags
declare -a MOUNT_FLAGS
for _mount in ${MOUNTS[@]}; do
_split=($(echo $_mount | tr ':' '\n'))
MOUNT_FLAGS+=("--bind" "${_split[0]}:${_split[1]}")
done

set -x
sudo apptainer exec --nv --ipc --writable-tmpfs \
--pwd $PWD \
"${MOUNT_FLAGS[@]}" \
--env CI_BUILD_USER=$CI_BUILD_USER \
--env CI_BUILD_UID=$CI_BUILD_UID \
--env CI_BUILD_GROUP=$CI_BUILD_GROUP \
--env CI_BUILD_GID=$CI_BUILD_GID \
llm_gpubringup.sif \
bash ./with_the_same_user
4 changes: 2 additions & 2 deletions language/llama2-70b/build.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
set -e

conda install pybind11==2.10.4 -c conda-forge -y
conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch-nightly -c nvidia
conda install pybind11==2.10.4 -y
conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch-nightly
python -m pip install transformers==4.31.0 nltk==3.8.1 evaluate==0.4.0 absl-py==1.4.0 rouge-score==0.1.2 sentencepiece==0.1.99 accelerate==0.21.0


Expand Down
35 changes: 35 additions & 0 deletions language/llama2-70b/exec_app.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
MLCOMMONS_REPO_PATH="$(dirname "$(dirname "$PWD")")"

# Add any volume mounts here with the following syntax
# /path/to/src:/path/to/dir/in/container
MOUNTS=(
$MLCOMMONS_REPO_PATH:$MLCOMMONS_REPO_PATH,
/share:/share,
/usr/bin/srun:/usr/bin/srun,
/usr/bin/sinfo:/usr/bin/sinfo,
/share/software/spack/opt/spack/linux-rocky8-zen/gcc-8.5.0/slurm-23-11-1-1-yh4vs4sr7xks2nbzffs2hdwe7pqfovsg:/opt/slurm,
/var/spool/slurm/d/conf-cache:/var/spool/slurm/d/conf-cache
)

CI_BUILD_USER=$(id -u -n)
CI_BUILD_UID=$(id -u)
CI_BUILD_GROUP=$(id -g -n)
CI_BUILD_GID=$(id -g)

# Build mount flags
declare -a MOUNT_FLAGS
for _mount in ${MOUNTS[@]}; do
_split=($(echo $_mount | tr ':' '\n'))
MOUNT_FLAGS+=("--bind" "${_split[0]}:${_split[1]}")
done

set -x
apptainer exec --nv --ipc --writable-tmpfs \
--pwd $PWD \
"${MOUNT_FLAGS[@]}" \
--env CI_BUILD_USER=$CI_BUILD_USER \
--env CI_BUILD_UID=$CI_BUILD_UID \
--env CI_BUILD_GROUP=$CI_BUILD_GROUP \
--env CI_BUILD_GID=$CI_BUILD_GID \
llm_gpubringup.sif \
bash ./with_the_same_user
48 changes: 48 additions & 0 deletions language/llama2-70b/llm_gpubringup.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
Bootstrap: docker
From: nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04

%environment
export LC_ALL=C.UTF-8
export LANG=C.UTF-8
export TZ=US/Pacific
export DEBIAN_FRONTEND=noninteractive
export PATH=$PATH:/opt/miniconda3/bin

%post
# Use bash
SHELL=/bin/bash

echo "Setting timezone..."
ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone

echo "Cleaning apt lists and sources..."
rm -rf /var/lib/apt/lists/* && rm -f /etc/apt/sources.list.d/*

echo "Updating apt and installing base packages..."
apt-get update && apt-get install -y --no-install-recommends \
build-essential autoconf libtool git ccache curl wget pkg-config \
sudo ca-certificates automake libssl-dev bc python3-dev python3-pip \
google-perftools gdb libglib2.0-dev clang sshfs libre2-dev libboost-dev \
libnuma-dev numactl sysstat sshpass ntpdate less iputils-ping rsync \
pkg-config zip g++ zlib1g-dev unzip libarchive-dev

# Remove unneeded packages
apt-get -y autoremove
apt-get remove -y cmake

echo "Upgrading pip and setuptools..."
python3 -m pip install --upgrade pip setuptools wheel virtualenv

echo "Installing Miniconda..."
cd /tmp
wget https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-Linux-x86_64.sh
bash Miniconda3-py310_23.5.2-0-Linux-x86_64.sh -b -p /opt/miniconda3
chmod -R 777 /opt/miniconda3

echo "Creating conda environment llama2-70b..."
/opt/miniconda3/bin/conda create -n llama2-70b python=3.10

%runscript
echo "Container built successfully. Use '--nv' for GPU support."
exec "$@"

11 changes: 11 additions & 0 deletions language/llama2-70b/performance_benchmark.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
python3 -u main.py --scenario Offline --vllm \
--model-path /share/mlperf_sets/model/llama-2-70b-chat-hf.uri \
--user-conf user.conf \
--num-workers 4 \
--total-sample-count 24576 \
--dataset-path /share/mlperf_sets/data/validation/llama-2-70b-open-orca-dataset.uri/open_orca_gpt4_tokenized_llama.sampled_24576.pkl \
--output-log-dir offline-logs \
--dtype float32 \
--api-server http://127.0.0.1:8000 \
--api-model-name /share/mlperf_sets/model/llama-2-70b-chat-hf.uri \
--device cuda:0 2>&1 | tee offline_performance_log.log
10 changes: 6 additions & 4 deletions language/llama2-70b/run_accuracy.sh
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
CHECKPOINT_PATH="${CHECKPOINT_PATH:-meta-llama/Llama-2-70b-chat-hf}"
DATASET_PATH="${DATASET_PATH:-open-orca-val-set.pkl}"
CHECKPOINT_PATH=/share/mlperf_sets/model/llama-2-70b-chat-hf.uri
DATASET_PATH=/share/mlperf_sets/data/validation/llama-2-70b-open-orca-dataset.uri/open_orca_gpt4_tokenized_llama.sampled_24576.pkl

mkdir -p "run_outputs"

python3 -u main.py --scenario Offline \
python3 -u main.py --scenario Offline --vllm\
--model-path ${CHECKPOINT_PATH} \
--accuracy \
--mlperf-conf mlperf.conf \
--user-conf user.conf \
--total-sample-count 24576 \
--dataset-path ${DATASET_PATH} \
--num-workers 4 \
--output-log-dir offline_accuracy_loadgen_logs \
--dtype float32 \
--api-server http://127.0.0.1:8000 \
--api-model-name ${CHECKPOINT_PATH}
--device cuda:0 2>&1 | tee offline_accuracy_log.log

python3 evaluate-accuracy.py --checkpoint-path ${CHECKPOINT_PATH} \
Expand Down
3 changes: 3 additions & 0 deletions language/llama2-70b/vllm.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
CUDA_VISIBLE_DEVICES=0,1,2,3 python3 -m vllm.entrypoints.openai.api_server \
--model /share/mlperf_sets/model/llama-2-70b-chat-hf.uri \
--tensor-parallel-size 4
Loading