Skip to content

Commit 2dc5fe0

Browse files
committed
Merge branch 'master' of https://github.com/ggerganov/llama.cpp into adding_libmtmd_to_xcframework
2 parents a9ddab4 + 557515b commit 2dc5fe0

File tree

693 files changed

+98175
-46664
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

693 files changed

+98175
-46664
lines changed

.devops/cann.Dockerfile

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
# Define the CANN base image for easier version updates later
66
ARG CHIP_TYPE=910b
7-
ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.3.rc1.alpha001-${CHIP_TYPE}-openeuler22.03-py3.11
7+
ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.3.rc2-${CHIP_TYPE}-openeuler24.03-py3.11
88

99
# ==============================================================================
1010
# BUILD STAGE
@@ -13,7 +13,7 @@ ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.3.rc1.alpha001-${CHIP_TYPE}-openeuler2
1313
FROM ${CANN_BASE_IMAGE} AS build
1414

1515
# -- Install build dependencies --
16-
RUN yum install -y gcc g++ cmake make git libcurl-devel python3 python3-pip && \
16+
RUN yum install -y gcc g++ cmake make git openssl-devel python3 python3-pip && \
1717
yum clean all && \
1818
rm -rf /var/cache/yum
1919

@@ -42,6 +42,7 @@ RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh --force \
4242
-DGGML_CANN=ON \
4343
-DCMAKE_BUILD_TYPE=Release \
4444
-DSOC_TYPE=ascend${CHIP_TYPE} \
45+
-DUSE_ACL_GRAPH=ON \
4546
. && \
4647
cmake --build build --config Release -j$(nproc)
4748

@@ -107,11 +108,11 @@ ENTRYPOINT ["/app/tools.sh"]
107108
# ENTRYPOINT ["/app/llama-server"]
108109

109110
### Target: light
110-
# Lightweight image containing only llama-cli
111+
# Lightweight image containing only llama-cli and llama-completion
111112
# ==============================================================================
112113
FROM base AS light
113114

114-
COPY --from=build /app/full/llama-cli /app
115+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
115116

116117
ENTRYPOINT [ "/app/llama-cli" ]
117118

.devops/cpu.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ FROM ubuntu:$UBUNTU_VERSION AS build
55
ARG TARGETARCH
66

77
RUN apt-get update && \
8-
apt-get install -y build-essential git cmake libcurl4-openssl-dev
8+
apt-get install -y build-essential git cmake libssl-dev
99

1010
WORKDIR /app
1111

@@ -68,7 +68,7 @@ ENTRYPOINT ["/app/tools.sh"]
6868
### Light, CLI only
6969
FROM base AS light
7070

71-
COPY --from=build /app/full/llama-cli /app
71+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
7272

7373
WORKDIR /app
7474

.devops/cuda-new.Dockerfile

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
ARG UBUNTU_VERSION=24.04
2+
# This needs to generally match the container host's environment.
3+
ARG CUDA_VERSION=13.1.0
4+
# Target the CUDA build image
5+
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6+
7+
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
8+
9+
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
10+
11+
# CUDA architecture to build for (defaults to all supported archs)
12+
ARG CUDA_DOCKER_ARCH=default
13+
14+
RUN apt-get update && \
15+
apt-get install -y build-essential cmake python3 python3-pip git libssl-dev libgomp1
16+
17+
WORKDIR /app
18+
19+
COPY . .
20+
21+
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
22+
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
23+
fi && \
24+
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
25+
cmake --build build --config Release -j$(nproc)
26+
27+
RUN mkdir -p /app/lib && \
28+
find build -name "*.so*" -exec cp -P {} /app/lib \;
29+
30+
RUN mkdir -p /app/full \
31+
&& cp build/bin/* /app/full \
32+
&& cp *.py /app/full \
33+
&& cp -r gguf-py /app/full \
34+
&& cp -r requirements /app/full \
35+
&& cp requirements.txt /app/full \
36+
&& cp .devops/tools.sh /app/full/tools.sh
37+
38+
## Base image
39+
FROM ${BASE_CUDA_RUN_CONTAINER} AS base
40+
41+
RUN apt-get update \
42+
&& apt-get install -y libgomp1 curl\
43+
&& apt autoremove -y \
44+
&& apt clean -y \
45+
&& rm -rf /tmp/* /var/tmp/* \
46+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
47+
&& find /var/cache -type f -delete
48+
49+
COPY --from=build /app/lib/ /app
50+
51+
### Full
52+
FROM base AS full
53+
54+
COPY --from=build /app/full /app
55+
56+
WORKDIR /app
57+
58+
RUN apt-get update \
59+
&& apt-get install -y \
60+
git \
61+
python3 \
62+
python3-pip \
63+
python3-wheel \
64+
&& pip install --break-system-packages --upgrade setuptools \
65+
&& pip install --break-system-packages -r requirements.txt \
66+
&& apt autoremove -y \
67+
&& apt clean -y \
68+
&& rm -rf /tmp/* /var/tmp/* \
69+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
70+
&& find /var/cache -type f -delete
71+
72+
73+
ENTRYPOINT ["/app/tools.sh"]
74+
75+
### Light, CLI only
76+
FROM base AS light
77+
78+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
79+
80+
WORKDIR /app
81+
82+
ENTRYPOINT [ "/app/llama-cli" ]
83+
84+
### Server, Server only
85+
FROM base AS server
86+
87+
ENV LLAMA_ARG_HOST=0.0.0.0
88+
89+
COPY --from=build /app/full/llama-server /app
90+
91+
WORKDIR /app
92+
93+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
94+
95+
ENTRYPOINT [ "/app/llama-server" ]

.devops/cuda.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ FROM ${BASE_CUDA_DEV_CONTAINER} AS build
1212
ARG CUDA_DOCKER_ARCH=default
1313

1414
RUN apt-get update && \
15-
apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1
15+
apt-get install -y build-essential cmake python3 python3-pip git libssl-dev libgomp1
1616

1717
WORKDIR /app
1818

@@ -74,7 +74,7 @@ ENTRYPOINT ["/app/tools.sh"]
7474
### Light, CLI only
7575
FROM base AS light
7676

77-
COPY --from=build /app/full/llama-cli /app
77+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
7878

7979
WORKDIR /app
8080

.devops/intel.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ FROM intel/deep-learning-essentials:$ONEAPI_VERSION AS build
66

77
ARG GGML_SYCL_F16=OFF
88
RUN apt-get update && \
9-
apt-get install -y git libcurl4-openssl-dev
9+
apt-get install -y git libssl-dev
1010

1111
WORKDIR /app
1212

@@ -73,7 +73,7 @@ ENTRYPOINT ["/app/tools.sh"]
7373
FROM base AS light
7474

7575
COPY --from=build /app/lib/ /app
76-
COPY --from=build /app/full/llama-cli /app
76+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
7777

7878
WORKDIR /app
7979

.devops/llama-cli-cann.Dockerfile

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ WORKDIR /app
66

77
COPY . .
88

9-
RUN yum install -y gcc g++ cmake make libcurl-devel
9+
RUN yum install -y gcc g++ cmake make openssl-devel
1010
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
1111
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
1212
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
@@ -23,11 +23,12 @@ ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
2323
RUN echo "Building with static libs" && \
2424
source /usr/local/Ascend/ascend-toolkit/set_env.sh --force && \
2525
cmake -B build -DGGML_NATIVE=OFF -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_TESTS=OFF && \
26-
cmake --build build --config Release --target llama-cli
26+
cmake --build build --config Release --target llama-cli && \
27+
cmake --build build --config Release --target llama-completion
2728

2829
# TODO: use image with NNRT
2930
FROM ascendai/cann:$ASCEND_VERSION AS runtime
30-
COPY --from=build /app/build/bin/llama-cli /llama-cli
31+
COPY --from=build /app/build/bin/llama-cli /app/build/bin/llama-completion /
3132

3233
ENV LC_ALL=C.utf8
3334

.devops/llama-cpp-cuda.srpm.spec

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ make -j GGML_CUDA=1
3737
%install
3838
mkdir -p %{buildroot}%{_bindir}/
3939
cp -p llama-cli %{buildroot}%{_bindir}/llama-cuda-cli
40+
cp -p llama-completion %{buildroot}%{_bindir}/llama-cuda-completion
4041
cp -p llama-server %{buildroot}%{_bindir}/llama-cuda-server
4142
cp -p llama-simple %{buildroot}%{_bindir}/llama-cuda-simple
4243

@@ -68,6 +69,7 @@ rm -rf %{_builddir}/*
6869

6970
%files
7071
%{_bindir}/llama-cuda-cli
72+
%{_bindir}/llama-cuda-completion
7173
%{_bindir}/llama-cuda-server
7274
%{_bindir}/llama-cuda-simple
7375
/usr/lib/systemd/system/llamacuda.service

.devops/llama-cpp.srpm.spec

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ make -j
3939
%install
4040
mkdir -p %{buildroot}%{_bindir}/
4141
cp -p llama-cli %{buildroot}%{_bindir}/llama-cli
42+
cp -p llama-completion %{buildroot}%{_bindir}/llama-completion
4243
cp -p llama-server %{buildroot}%{_bindir}/llama-server
4344
cp -p llama-simple %{buildroot}%{_bindir}/llama-simple
4445

@@ -70,6 +71,7 @@ rm -rf %{_builddir}/*
7071

7172
%files
7273
%{_bindir}/llama-cli
74+
%{_bindir}/llama-completion
7375
%{_bindir}/llama-server
7476
%{_bindir}/llama-simple
7577
/usr/lib/systemd/system/llama.service

.devops/musa.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ RUN apt-get update && \
1818
python3 \
1919
python3-pip \
2020
git \
21-
libcurl4-openssl-dev \
21+
libssl-dev \
2222
libgomp1
2323

2424
WORKDIR /app
@@ -81,7 +81,7 @@ ENTRYPOINT ["/app/tools.sh"]
8181
### Light, CLI only
8282
FROM base AS light
8383

84-
COPY --from=build /app/full/llama-cli /app
84+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
8585

8686
WORKDIR /app
8787

.devops/nix/package.nix

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
useMpi ? false,
3333
useRocm ? config.rocmSupport,
3434
rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets,
35-
enableCurl ? true,
3635
useVulkan ? false,
3736
useRpc ? false,
3837
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
@@ -160,15 +159,13 @@ effectiveStdenv.mkDerivation (finalAttrs: {
160159
++ optionals useMpi [ mpi ]
161160
++ optionals useRocm rocmBuildInputs
162161
++ optionals useBlas [ blas ]
163-
++ optionals useVulkan vulkanBuildInputs
164-
++ optionals enableCurl [ curl ];
162+
++ optionals useVulkan vulkanBuildInputs;
165163

166164
cmakeFlags =
167165
[
168166
(cmakeBool "LLAMA_BUILD_SERVER" true)
169167
(cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
170168
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
171-
(cmakeBool "LLAMA_CURL" enableCurl)
172169
(cmakeBool "GGML_NATIVE" false)
173170
(cmakeBool "GGML_BLAS" useBlas)
174171
(cmakeBool "GGML_CUDA" useCuda)

0 commit comments

Comments
 (0)