Skip to content

Mirror NCBI Datasets to Object Storage #3197

Mirror NCBI Datasets to Object Storage

Mirror NCBI Datasets to Object Storage #3197

name: Mirror NCBI Datasets to Object Storage
on:
workflow_dispatch:
schedule:
- cron: '12 */2 * * *' # Runs every two hours at 12 minutes past the hour
jobs:
download-and-upload:
runs-on: ubuntu-latest
strategy:
matrix:
taxon_id:
- 10244 # Mpox
- 11053 # Dengue type 1
- 11060 # Dengue type 2
- 11069 # Dengue type 3
- 11070 # Dengue type 4
- 11137 # Coronavirus 229E
- 11234 # Measles morbillivirus
- 11250 # RSV
- 11520 # Influenza B
- 11676 # HIV1
- 12059 # Enterovirus
- 12637 # Dengue (all)
- 31631 # Coronavirus OC43
- 31704 # Coxsackievirus A16
- 39054 # Enterovirus A71
- 42769 # Coxsackievirus A10
- 42789 # Enterovirus D68
- 86107 # Coxsackievirus A6
- 118655 # Oropouche
- 138948 # Enterovirus A
- 138949 # Enterovirus B
- 138950 # Enterovirus C
- 138951 # Enterovirus D
- 162145 # Human metapneumovirus
- 197911 # Influenza A
- 208893 # RSV A
- 208895 # RSV B
- 277944 # Coronavirus NL63
- 290028 # Coronavirus HKU1
- 1868215 # Orthopneumovirus
- 3048148 # Metapneumovirus hominis
- 3048448 # West nile virus
- 3050294 # Chickenpox virus
- 3052460 # Orthoebolavirus sudanense
- 3052462 # Orthoebolavirus zairense
- 3052505 # Orthomarburgvirus marburgense
- 3052518 # CCHFV
fail-fast: false
steps:
- name: Checkout repository
uses: actions/checkout@v5
- uses: mamba-org/setup-micromamba@v2
with:
environment-name: datasets
create-args: ncbi-datasets-cli s3cmd
- name: Download NCBI Dataset
shell: bash -l {0}
run: |
datasets download virus genome taxon ${{ matrix.taxon_id }} --no-progressbar --filename ${{ matrix.taxon_id }}.zip
unzip -o ${{ matrix.taxon_id }}.zip
tar -I 'zstd -T0 -18' -cvf ${{ matrix.taxon_id }}.tar.zst ncbi_dataset
- name: Create S3cmd config
run: |
cat <<EOF > ~/.s3cfg
[default]
access_key = ${{ secrets.HETZNER_S3_ACCESS_KEY }}
secret_key = ${{ secrets.HETZNER_S3_SECRET_KEY }}
host_base = hel1.your-objectstorage.com
host_bucket = %(bucket)s.hel1.your-objectstorage.com
verbosity = DEBUG
EOF
- name: Upload to Object Storage
shell: bash -l {0}
run: |
for file in ${{ matrix.taxon_id }}.zip ${{ matrix.taxon_id }}.tar.zst ; do
s3cmd put "${file}" s3://loculus-public/mirror/"${file}"
done
notify_on_failure: # Runs only if 'build' job fails
needs: download-and-upload
runs-on: ubuntu-latest
if: failure()
steps:
- name: Send Slack Notification
env:
SLACK_HOOK: ${{ secrets.SLACK_HOOK }}
run: |
curl -X POST -H 'Content-type: application/json' --data '{
"text": "🚨 Failed to mirror NCBI datasets: ${{ github.repository }}\n🔗 <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Details>"
}' $SLACK_HOOK