-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathDockerfile
More file actions
98 lines (87 loc) · 2.84 KB
/
Dockerfile
File metadata and controls
98 lines (87 loc) · 2.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
ARG PYTHON_VERSION="3.11"
ARG DISTRO="bookworm"
FROM python:${PYTHON_VERSION}-slim-${DISTRO} AS base
# Redeclare DISTRO to have it available in the next stages
ARG DISTRO="bookworm"
# Setup env
ENV LANG=C.UTF-8
ENV LC_ALL=C.UTF-8
# start pyarrow build
ARG ARROW_VERSION=23.0.0
RUN echo "deb http://deb.debian.org/debian ${DISTRO}-backports main" >> /etc/apt/sources.list \
&& apt-get update \
&& apt-get -y install \
sudo \
autoconf \
bison \
ca-certificates \
curl \
flex \
g++ \
gcc \
libgfortran5 \
libboost-dev \
libboost-filesystem-dev \
libboost-regex-dev \
libboost-system-dev \
libgflags-dev \
libutf8proc-dev\
libjemalloc-dev \
libopenblas-dev \
libssl-dev \
make \
ninja-build \
pkg-config \
rapidjson-dev \
tzdata \
liblz4-dev \
libsnappy-dev \
libzstd-dev \
# cmake from debian-backports
&& apt-get -t ${DISTRO}-backports install -y \
cmake \
&& rm -rf /var/lib/apt/lists/*
RUN echo "[global]\nextra-index-url=https://www.piwheels.org/simple" > /etc/pip.conf \
&& python -m pip install -U pip \
&& python -m pip install wheel setuptools numpy pandas psutil cython \
&& which python
WORKDIR /build/arrow
RUN curl --silent --show-error --fail --location \
https://github.com/apache/arrow/archive/apache-arrow-${ARROW_VERSION}.tar.gz \
| tar --strip-components=1 -xz
ENV ARROW_HOME=/dist
ENV CMAKE_BUILD_PARALLEL_LEVEL=3
WORKDIR /build/arrow/cpp/release
RUN cmake \
-DPYTHON_EXECUTABLE=/usr/local/bin/python \
-DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
-DCMAKE_INSTALL_LIBDIR=lib \
-DARROW_WITH_BZ2=ON \
-DARROW_WITH_ZLIB=ON \
-DARROW_WITH_ZSTD=ON \
-DARROW_WITH_LZ4=ON \
-DARROW_WITH_SNAPPY=ON \
-DARROW_PARQUET=ON \
-DARROW_PYTHON=ON \
-DARROW_BUILD_TESTS=OFF \
-DARROW_DATASET=ON \
-DARROW_PLASMA=ON \
.. \
&& make -j ${CMAKE_BUILD_PARALLEL_LEVEL} \
&& make install
ENV PYARROW_CMAKE_GENERATOR=Ninja
ENV PYARROW_CMAKE_OPTIONS="-DARROW_USE_LD_GOLD=ON"
ENV PYARROW_WITH_PLASMA=1
ENV PYARROW_WITH_PARQUET=1
ENV PYARROW_BUNDLE_ARROW_CPP=1
ENV LD_LIBRARY_PATH=$ARROW_HOME
ENV CMAKE_PREFIX_PATH=$ARROW_HOME/lib:$LD_LIBRARY_PATH
ENV Arrow_DIR=$ARROW_HOME
ENV Parquet_DIR=$ARROW_HOME
ENV Plasma_DIR=$ARROW_HOME
WORKDIR /build/arrow/python
RUN pip install -r requirements-wheel-build.txt cython \
&& export Python3_NumPy_INCLUDE_DIRS=$(python -c "import numpy; print(numpy.get_include())") \
&& python setup.py build_ext --build-type="release" --bundle-arrow-cpp bdist_wheel \
&& ls -l /build/arrow/python/dist
# COPY --from=pyarrow-deps /build/arrow/python/dist/pyarrow-*.whl .