Skip to content

Commit f4f207a

Browse files
committed
add a cli flag to avoid rate limit when unauthenticated
1 parent 0f863fb commit f4f207a

File tree

3 files changed

+33
-10
lines changed

3 files changed

+33
-10
lines changed

nf_core/__main__.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,13 @@ def command_pipelines_lint(
421421
default=4,
422422
help="Number of parallel image downloads",
423423
)
424+
@click.option(
425+
"-a",
426+
"--api_download",
427+
is_flag=True,
428+
default=False,
429+
help="Download repos via GitHub API (allows for authentication).",
430+
)
424431
@click.pass_context
425432
def command_pipelines_download(
426433
ctx,
@@ -437,6 +444,7 @@ def command_pipelines_download(
437444
container_cache_utilisation,
438445
container_cache_index,
439446
parallel_downloads,
447+
api_download,
440448
):
441449
"""
442450
Download a pipeline, nf-core/configs and pipeline singularity images.
@@ -456,6 +464,7 @@ def command_pipelines_download(
456464
container_cache_utilisation,
457465
container_cache_index,
458466
parallel_downloads,
467+
api_download,
459468
)
460469

461470

nf_core/commands_pipelines.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@ def pipelines_download(
176176
container_cache_utilisation,
177177
container_cache_index,
178178
parallel_downloads,
179+
api_download,
179180
):
180181
"""
181182
Download a pipeline, nf-core/configs and pipeline singularity images.
@@ -199,6 +200,7 @@ def pipelines_download(
199200
container_cache_utilisation,
200201
container_cache_index,
201202
parallel_downloads,
203+
api_download,
202204
)
203205
dl.download_workflow()
204206

nf_core/pipelines/download.py

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ class DownloadWorkflow:
103103
container_cache_utilisation (str): If a local or remote cache of already existing container images should be considered. Defaults to None.
104104
container_cache_index (str): An index for the remote container cache. Defaults to None.
105105
parallel_downloads (int): The number of parallel downloads to use. Defaults to 4.
106+
authenticated (bool): If True, use the GitHub API to download. Requires authentication e.g., via GITHUB_TOKEN. Defaults to False.
106107
"""
107108

108109
def __init__(
@@ -120,6 +121,7 @@ def __init__(
120121
container_cache_utilisation=None,
121122
container_cache_index=None,
122123
parallel_downloads=4,
124+
api_download=False,
123125
):
124126
self.pipeline = pipeline
125127
if isinstance(revision, str):
@@ -161,6 +163,8 @@ def __init__(
161163
self.container_cache_index = container_cache_index
162164
# allows to specify a container library / registry or a respective mirror to download images from
163165
self.parallel_downloads = parallel_downloads
166+
# if authenticated is True, we will use the GitHub API to download the workflow files.
167+
self.api_download = api_download
164168

165169
self.wf_revisions = []
166170
self.wf_branches: Dict[str, Any] = {}
@@ -269,8 +273,8 @@ def download_workflow_static(self):
269273
# Download the pipeline files for each selected revision
270274
log.info("Downloading workflow files from GitHub")
271275

272-
for item in zip(self.revision, self.wf_sha.values()):
273-
revision_dirname = self.download_wf_files(revision=item[0], wf_sha=item[1])
276+
for item in zip(self.revision, self.wf_sha.values(), self.wf_download_url.values()):
277+
revision_dirname = self.download_wf_files(revision=item[0], wf_sha=item[1], download_url=item[2])
274278

275279
if self.include_configs:
276280
try:
@@ -627,18 +631,26 @@ def prompt_compression_type(self):
627631
if self.compress_type == "none":
628632
self.compress_type = None
629633

630-
def download_wf_files(self, revision, wf_sha):
634+
def download_wf_files(self, revision, wf_sha, download_url):
631635
"""Downloads workflow files from GitHub to the :attr:`self.outdir`."""
632636

633-
api_url = f"https://api.github.com/repos/{self.pipeline}/commits/{wf_sha}"
634-
log.debug(f"Downloading from API {api_url}")
637+
if not self.api_download:
638+
log.debug(f"Downloading {download_url}")
639+
# Download GitHub zip file into memory and extract
640+
url = requests.get(download_url)
641+
with ZipFile(io.BytesIO(url.content)) as zipfile:
642+
zipfile.extractall(self.outdir)
643+
topdir = f"{self.pipeline}-{wf_sha if bool(wf_sha) else ''}".split("/")[-1]
644+
else:
645+
api_url = f"https://api.github.com/repos/{self.pipeline}/zipball/{wf_sha}"
646+
log.debug(f"Downloading from API {api_url}")
635647

636-
# Download GitHub zip file into memory and extract
637-
content = gh_api.get(api_url).content
648+
# Download GitHub zip file into memory and extract
649+
content = gh_api.get(api_url).content
638650

639-
with ZipFile(io.BytesIO(content)) as zipfile:
640-
topdir = zipfile.namelist()[0]
641-
zipfile.extractall(self.outdir)
651+
with ZipFile(io.BytesIO(content)) as zipfile:
652+
topdir = zipfile.namelist()[0]
653+
zipfile.extractall(self.outdir)
642654

643655
# create a filesystem-safe version of the revision name for the directory
644656
revision_dirname = re.sub("[^0-9a-zA-Z]+", "_", revision)

0 commit comments

Comments
 (0)