From 97567e56838c04acfbb9dee539482b25f9d662d7 Mon Sep 17 00:00:00 2001 From: Pratik Raj Date: Sat, 12 Oct 2024 00:49:23 +0530 Subject: [PATCH] feat : use --no-cache-dir flag to pip in dockerfiles to save space using the "--no-cache-dir" flag in pip install, make sure downloaded packages by pip don't cache on the system. This is a best practice that makes sure to fetch from a repo instead of using a local cached one. Further, in the case of Docker Containers, by restricting caching, we can reduce image size. In terms of stats, it depends upon the number of python packages multiplied by their respective size. e.g for heavy packages with a lot of dependencies it reduces a lot by don't cache pip packages. Further, more detailed information can be found at https://medium.com/sciforce/strategies-of-docker-images-optimization-2ca9cc5719b6 Signed-off-by: Pratik Raj --- dockerfiles/cuda_torch03 | 30 +++++++++++++++--------------- dockerfiles/cuda_torch04 | 32 ++++++++++++++++---------------- dockerfiles/torch03 | 30 +++++++++++++++--------------- dockerfiles/torch04 | 32 ++++++++++++++++---------------- 4 files changed, 62 insertions(+), 62 deletions(-) diff --git a/dockerfiles/cuda_torch03 b/dockerfiles/cuda_torch03 index 05a5353..b004e7b 100644 --- a/dockerfiles/cuda_torch03 +++ b/dockerfiles/cuda_torch03 @@ -30,24 +30,24 @@ ENV LC_ALL en_US.UTF-8 RUN conda install -c pytorch pytorch=0.3 cuda90 # Revtok -RUN pip install -e git+https://github.com/jekbradbury/revtok.git#egg=revtok +RUN pip install --no-cache-dir -e git+https://github.com/jekbradbury/revtok.git#egg=revtok # torchtext requirements -RUN pip install tqdm -RUN pip install nltk==3.2.5 +RUN pip install --no-cache-dir tqdm +RUN pip install --no-cache-dir nltk==3.2.5 # tensorboard -RUN pip install tensorboardX -RUN pip install tensorboard -RUN pip install tensorflow -RUN pip install python-dateutil +RUN pip install --no-cache-dir tensorboardX +RUN pip install --no-cache-dir tensorboard +RUN pip install --no-cache-dir tensorflow +RUN pip install --no-cache-dir python-dateutil # additional python packages -RUN pip install ujson -RUN pip install -e git+git://github.com/andersjo/pyrouge.git#egg=pyrouge +RUN pip install --no-cache-dir ujson +RUN pip install --no-cache-dir -e git+git://github.com/andersjo/pyrouge.git#egg=pyrouge RUN cd /src/pyrouge/pyrouge/../tools/ROUGE-1.5.5/data/ && rm WordNet-2.0.exc.db && ./WordNet-2.0-Exceptions/buildExeptionDB.pl ./WordNet-2.0-Exceptions ./smart_common_words.txt ./WordNet-2.0.exc.db && chmod 777 WordNet-2.0.exc.db -#RUN pip install lxml -RUN pip install sacrebleu +#RUN pip install --no-cache-dir lxml +RUN pip install --no-cache-dir sacrebleu # Install packages for XML processing RUN apt-get install --yes \ @@ -61,10 +61,10 @@ RUN apt-get install --yes \ python-lxml # WikISQL evaluation -RUN pip install records -RUN pip install babel -RUN pip install tabulate +RUN pip install --no-cache-dir records +RUN pip install --no-cache-dir babel +RUN pip install --no-cache-dir tabulate -RUN pip install -e git+git://github.com/salesforce/cove.git#egg=cove +RUN pip install --no-cache-dir -e git+git://github.com/salesforce/cove.git#egg=cove CMD bash diff --git a/dockerfiles/cuda_torch04 b/dockerfiles/cuda_torch04 index c158c9f..49ef970 100644 --- a/dockerfiles/cuda_torch04 +++ b/dockerfiles/cuda_torch04 @@ -30,24 +30,24 @@ ENV LC_ALL en_US.UTF-8 RUN conda install -c pytorch pytorch=0.4.1 cuda90 # Revtok -RUN pip install -e git+https://github.com/jekbradbury/revtok.git#egg=revtok +RUN pip install --no-cache-dir -e git+https://github.com/jekbradbury/revtok.git#egg=revtok # torchtext requirements -RUN pip install tqdm -RUN pip install nltk==3.2.5 +RUN pip install --no-cache-dir tqdm +RUN pip install --no-cache-dir nltk==3.2.5 # tensorboard -RUN pip install tensorboardX -RUN pip install tensorboard -RUN pip install tensorflow -RUN pip install python-dateutil +RUN pip install --no-cache-dir tensorboardX +RUN pip install --no-cache-dir tensorboard +RUN pip install --no-cache-dir tensorflow +RUN pip install --no-cache-dir python-dateutil # additional python packages -RUN pip install ujson -RUN pip install -e git+git://github.com/andersjo/pyrouge.git#egg=pyrouge +RUN pip install --no-cache-dir ujson +RUN pip install --no-cache-dir -e git+git://github.com/andersjo/pyrouge.git#egg=pyrouge RUN cd /src/pyrouge/pyrouge/../tools/ROUGE-1.5.5/data/ && rm WordNet-2.0.exc.db && ./WordNet-2.0-Exceptions/buildExeptionDB.pl ./WordNet-2.0-Exceptions ./smart_common_words.txt ./WordNet-2.0.exc.db && chmod 777 WordNet-2.0.exc.db -#RUN pip install lxml -RUN pip install sacrebleu +#RUN pip install --no-cache-dir lxml +RUN pip install --no-cache-dir sacrebleu # Install packages for XML processing RUN apt-get install --yes \ @@ -61,11 +61,11 @@ RUN apt-get install --yes \ python-lxml # WikISQL evaluation -RUN pip install records -RUN pip install babel -RUN pip install tabulate +RUN pip install --no-cache-dir records +RUN pip install --no-cache-dir babel +RUN pip install --no-cache-dir tabulate -RUN pip install -e git+git://github.com/salesforce/cove.git#egg=cove -RUN pip install allennlp +RUN pip install --no-cache-dir -e git+git://github.com/salesforce/cove.git#egg=cove +RUN pip install --no-cache-dir allennlp CMD bash diff --git a/dockerfiles/torch03 b/dockerfiles/torch03 index a63b14c..028f394 100644 --- a/dockerfiles/torch03 +++ b/dockerfiles/torch03 @@ -30,24 +30,24 @@ ENV LC_ALL en_US.UTF-8 RUN conda install -c pytorch pytorch=0.3 # Revtok -RUN pip install -e git+https://github.com/jekbradbury/revtok.git#egg=revtok +RUN pip install --no-cache-dir -e git+https://github.com/jekbradbury/revtok.git#egg=revtok # torchtext requirements -RUN pip install tqdm -RUN pip install nltk==3.2.5 +RUN pip install --no-cache-dir tqdm +RUN pip install --no-cache-dir nltk==3.2.5 # tensorboard -RUN pip install tensorboardX -RUN pip install tensorboard -RUN pip install tensorflow -RUN pip install python-dateutil +RUN pip install --no-cache-dir tensorboardX +RUN pip install --no-cache-dir tensorboard +RUN pip install --no-cache-dir tensorflow +RUN pip install --no-cache-dir python-dateutil # additional python packages -RUN pip install ujson -RUN pip install -e git+git://github.com/andersjo/pyrouge.git#egg=pyrouge +RUN pip install --no-cache-dir ujson +RUN pip install --no-cache-dir -e git+git://github.com/andersjo/pyrouge.git#egg=pyrouge RUN cd /src/pyrouge/pyrouge/../tools/ROUGE-1.5.5/data/ && rm WordNet-2.0.exc.db && ./WordNet-2.0-Exceptions/buildExeptionDB.pl ./WordNet-2.0-Exceptions ./smart_common_words.txt ./WordNet-2.0.exc.db && chmod 777 WordNet-2.0.exc.db -#RUN pip install lxml -RUN pip install sacrebleu +#RUN pip install --no-cache-dir lxml +RUN pip install --no-cache-dir sacrebleu # Install packages for XML processing RUN apt-get install --yes \ @@ -61,9 +61,9 @@ RUN apt-get install --yes \ python-lxml # WikISQL evaluation -RUN pip install records -RUN pip install babel -RUN pip install tabulate -RUN pip install -e git+git://github.com/salesforce/cove.git#egg=cove +RUN pip install --no-cache-dir records +RUN pip install --no-cache-dir babel +RUN pip install --no-cache-dir tabulate +RUN pip install --no-cache-dir -e git+git://github.com/salesforce/cove.git#egg=cove CMD bash diff --git a/dockerfiles/torch04 b/dockerfiles/torch04 index f595913..8cce5f2 100644 --- a/dockerfiles/torch04 +++ b/dockerfiles/torch04 @@ -30,24 +30,24 @@ ENV LC_ALL en_US.UTF-8 RUN conda install -c pytorch pytorch=0.4.1 # Revtok -RUN pip install -e git+https://github.com/jekbradbury/revtok.git#egg=revtok +RUN pip install --no-cache-dir -e git+https://github.com/jekbradbury/revtok.git#egg=revtok # torchtext requirements -RUN pip install tqdm -RUN pip install nltk==3.2.5 +RUN pip install --no-cache-dir tqdm +RUN pip install --no-cache-dir nltk==3.2.5 # tensorboard -RUN pip install tensorboardX -RUN pip install tensorboard -RUN pip install tensorflow -RUN pip install python-dateutil +RUN pip install --no-cache-dir tensorboardX +RUN pip install --no-cache-dir tensorboard +RUN pip install --no-cache-dir tensorflow +RUN pip install --no-cache-dir python-dateutil # additional python packages -RUN pip install ujson -RUN pip install -e git+git://github.com/andersjo/pyrouge.git#egg=pyrouge +RUN pip install --no-cache-dir ujson +RUN pip install --no-cache-dir -e git+git://github.com/andersjo/pyrouge.git#egg=pyrouge RUN cd /src/pyrouge/pyrouge/../tools/ROUGE-1.5.5/data/ && rm WordNet-2.0.exc.db && ./WordNet-2.0-Exceptions/buildExeptionDB.pl ./WordNet-2.0-Exceptions ./smart_common_words.txt ./WordNet-2.0.exc.db && chmod 777 WordNet-2.0.exc.db -#RUN pip install lxml -RUN pip install sacrebleu +#RUN pip install --no-cache-dir lxml +RUN pip install --no-cache-dir sacrebleu # Install packages for XML processing RUN apt-get install --yes \ @@ -61,11 +61,11 @@ RUN apt-get install --yes \ python-lxml # WikISQL evaluation -RUN pip install records -RUN pip install babel -RUN pip install tabulate +RUN pip install --no-cache-dir records +RUN pip install --no-cache-dir babel +RUN pip install --no-cache-dir tabulate -RUN pip install -e git+git://github.com/salesforce/cove.git#egg=cove -RUN pip install allennlp +RUN pip install --no-cache-dir -e git+git://github.com/salesforce/cove.git#egg=cove +RUN pip install --no-cache-dir allennlp CMD bash