Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow nf-core download -r to download commits #2906

Draft
wants to merge 3 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions nf_core/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ def __init__(

self.wf_revisions = {}
self.wf_branches = {}
self.wf_commits = {}
self.wf_sha = {}
self.wf_download_url = {}
self.nf_config = {}
Expand All @@ -160,8 +161,8 @@ def download_workflow(self):
# Get workflow details
try:
self.prompt_pipeline_name()
self.pipeline, self.wf_revisions, self.wf_branches = nf_core.utils.get_repo_releases_branches(
self.pipeline, self.wfs
self.pipeline, self.wf_revisions, self.wf_branches, self.wf_commits = (
nf_core.utils.get_repo_releases_branches_commits(self.pipeline, self.wfs)
)
self.prompt_revision()
self.get_revision_hash()
Expand Down Expand Up @@ -354,13 +355,18 @@ def prompt_revision(self):
raise AssertionError(f"No revisions of {self.pipeline} available for download.")

def get_revision_hash(self):
"""Find specified revision / branch hash"""
"""Find specified revision / branch / commit hash"""

for revision in self.revision: # revision is a list of strings, but may be of length 1
# Branch
if revision in self.wf_branches.keys():
self.wf_sha = {**self.wf_sha, revision: self.wf_branches[revision]}

# Commit - full or short hash
elif revision in self.wf_commits or any(full_hash[:7] == revision for full_hash in self.wf_commits):
for full_hash in self.wf_commits:
if full_hash[:7] == revision or full_hash == revision:
self.wf_sha = {**self.wf_sha, revision: full_hash}
break
# Revision
else:
for r in self.wf_revisions:
Expand Down Expand Up @@ -616,6 +622,7 @@ def download_wf_files(self, revision, wf_sha, download_url):

# Rename the internal directory name to be more friendly
gh_name = f"{self.pipeline}-{wf_sha if bool(wf_sha) else ''}".split("/")[-1]

os.rename(
os.path.join(self.outdir, gh_name),
os.path.join(self.outdir, revision_dirname),
Expand Down
2 changes: 1 addition & 1 deletion nf_core/launch.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ def get_pipeline_schema(self):

if not self.pipeline_revision:
try:
self.pipeline, wf_releases, wf_branches = nf_core.utils.get_repo_releases_branches(
self.pipeline, wf_releases, wf_branches, _ = nf_core.utils.get_repo_releases_branches_commits(
self.pipeline, self.wfs
)
except AssertionError as e:
Expand Down
9 changes: 7 additions & 2 deletions nf_core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -940,7 +940,7 @@ def validate(self, value):
return True


def get_repo_releases_branches(pipeline, wfs):
def get_repo_releases_branches_commits(pipeline, wfs):
"""Fetches details of a nf-core workflow to download.

Args:
Expand All @@ -956,6 +956,7 @@ def get_repo_releases_branches(pipeline, wfs):

wf_releases = []
wf_branches = {}
wf_commits = []

# Repo is a nf-core pipeline
for wf in wfs.remote_workflows:
Expand Down Expand Up @@ -1009,8 +1010,12 @@ def get_repo_releases_branches(pipeline, wfs):
):
wf_branches[branch["name"]] = branch["commit"]["sha"]

# Get commit information from github api
commit_response = gh_api.safe_get(f"https://api.github.com/repos/{pipeline}/commits?sha={branch['name']}")
for commit in commit_response.json():
wf_commits.append(commit["sha"])
Comment on lines +1013 to +1016
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am a bit scared of this - is it really getting every single commit from a repo? In some repos that could be a huge response.

I wonder if we move this into a separate function call and only do it if we need to - that is, once we're sure that the user is passing a commit and is not going to use a branch or a tag?

I suspect that the number of people wanting exact commits to be a tiny proportion, so I don't want to slow down / risk crashes for the majority.

# Return pipeline again in case we added the nf-core/ prefix
return pipeline, wf_releases, wf_branches
return pipeline, wf_releases, wf_branches, wf_commits


CONFIG_PATHS = [".nf-core.yml", ".nf-core.yaml"]
Expand Down
77 changes: 56 additions & 21 deletions tests/test_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,9 @@ def test_get_release_hash_release(self):
wfs.get_remote_workflows()
pipeline = "methylseq"
download_obj = DownloadWorkflow(pipeline=pipeline, revision="1.6")
(
download_obj.pipeline,
download_obj.wf_revisions,
download_obj.wf_branches,
) = nf_core.utils.get_repo_releases_branches(pipeline, wfs)
(download_obj.pipeline, download_obj.wf_revisions, download_obj.wf_branches, _) = (
nf_core.utils.get_repo_releases_branches_commits(pipeline, wfs)
)
download_obj.get_revision_hash()
assert download_obj.wf_sha[download_obj.revision[0]] == "b3e5e3b95aaf01d98391a62a10a3990c0a4de395"
assert download_obj.outdir == "nf-core-methylseq_1.6"
Expand All @@ -47,29 +45,69 @@ def test_get_release_hash_branch(self):
# Exoseq pipeline is archived, so `dev` branch should be stable
pipeline = "exoseq"
download_obj = DownloadWorkflow(pipeline=pipeline, revision="dev")
(download_obj.pipeline, download_obj.wf_revisions, download_obj.wf_branches, _) = (
nf_core.utils.get_repo_releases_branches_commits(pipeline, wfs)
)
download_obj.get_revision_hash()
assert download_obj.wf_sha[download_obj.revision[0]] == "819cbac792b76cf66c840b567ed0ee9a2f620db7"
assert download_obj.outdir == "nf-core-exoseq_dev"
assert (
download_obj.wf_download_url[download_obj.revision[0]]
== "https://github.com/nf-core/exoseq/archive/819cbac792b76cf66c840b567ed0ee9a2f620db7.zip"
)

def test_get_release_hash_long_commit(self):
wfs = nf_core.list.Workflows()
wfs.get_remote_workflows()
# Exoseq pipeline is archived, so `dev` branch should be stable
pipeline = "exoseq"

download_obj = DownloadWorkflow(pipeline="exoseq", revision="819cbac792b76cf66c840b567ed0ee9a2f620db7")
(
download_obj.pipeline,
download_obj.wf_revisions,
download_obj.wf_branches,
) = nf_core.utils.get_repo_releases_branches(pipeline, wfs)
download_obj.wf_commits,
) = nf_core.utils.get_repo_releases_branches_commits(pipeline, wfs)
download_obj.get_revision_hash()
print(download_obj)
assert download_obj.wf_sha[download_obj.revision[0]] == "819cbac792b76cf66c840b567ed0ee9a2f620db7"
assert download_obj.outdir == "nf-core-exoseq_dev"
assert download_obj.outdir == "nf-core-exoseq_819cbac792b76cf66c840b567ed0ee9a2f620db7"
assert (
download_obj.wf_download_url[download_obj.revision[0]]
== "https://github.com/nf-core/exoseq/archive/819cbac792b76cf66c840b567ed0ee9a2f620db7.zip"
)

def test_get_release_hash_non_existent_release(self):
def test_get_release_hash_short_commit(self):
wfs = nf_core.list.Workflows()
wfs.get_remote_workflows()
pipeline = "methylseq"
download_obj = DownloadWorkflow(pipeline=pipeline, revision="thisisfake")
# Exoseq pipeline is archived, so `dev` branch should be stable
pipeline = "exoseq"

download_obj = DownloadWorkflow(pipeline="exoseq", revision="819cbac")
(
download_obj.pipeline,
download_obj.wf_revisions,
download_obj.wf_branches,
) = nf_core.utils.get_repo_releases_branches(pipeline, wfs)
download_obj.wf_commits,
) = nf_core.utils.get_repo_releases_branches_commits(pipeline, wfs)
download_obj.get_revision_hash()
print(download_obj)
assert download_obj.wf_sha[download_obj.revision[0]] == "819cbac792b76cf66c840b567ed0ee9a2f620db7"
assert download_obj.outdir == "nf-core-exoseq_819cbac"
assert (
download_obj.wf_download_url[download_obj.revision[0]]
== "https://github.com/nf-core/exoseq/archive/819cbac792b76cf66c840b567ed0ee9a2f620db7.zip"
)

def test_get_release_hash_non_existent_release(self):
wfs = nf_core.list.Workflows()
wfs.get_remote_workflows()
pipeline = "methylseq"
download_obj = DownloadWorkflow(pipeline=pipeline, revision="thisisfake")
(download_obj.pipeline, download_obj.wf_revisions, download_obj.wf_branches, _) = (
nf_core.utils.get_repo_releases_branches_commits(pipeline, wfs)
)
with pytest.raises(AssertionError):
download_obj.get_revision_hash()

Expand Down Expand Up @@ -570,30 +608,27 @@ def test_download_workflow_with_success(self, tmp_dir, mock_download_image, mock
def test_download_workflow_for_platform(self, tmp_dir, _):
download_obj = DownloadWorkflow(
pipeline="nf-core/rnaseq",
revision=("3.7", "3.9"),
revision=("3.7", "3.9", "1817d14"),
compress_type="none",
platform=True,
container_system="singularity",
)

download_obj.include_configs = False # suppress prompt, because stderr.is_interactive doesn't.

assert isinstance(download_obj.revision, list) and len(download_obj.revision) == 2
assert isinstance(download_obj.revision, list) and len(download_obj.revision) == 3
assert isinstance(download_obj.wf_sha, dict) and len(download_obj.wf_sha) == 0
assert isinstance(download_obj.wf_download_url, dict) and len(download_obj.wf_download_url) == 0

wfs = nf_core.list.Workflows()
wfs.get_remote_workflows()
(
download_obj.pipeline,
download_obj.wf_revisions,
download_obj.wf_branches,
) = nf_core.utils.get_repo_releases_branches(download_obj.pipeline, wfs)
(download_obj.pipeline, download_obj.wf_revisions, download_obj.wf_branches, download_obj.wf_commits) = (
nf_core.utils.get_repo_releases_branches_commits(download_obj.pipeline, wfs)
)

download_obj.get_revision_hash()

# download_obj.wf_download_url is not set for Seqera Platform downloads, but the sha values are
assert isinstance(download_obj.wf_sha, dict) and len(download_obj.wf_sha) == 2
# download_obj.wf_download_url is not set for tower downloads, but the sha values are
assert isinstance(download_obj.wf_sha, dict) and len(download_obj.wf_sha) == 3
assert isinstance(download_obj.wf_download_url, dict) and len(download_obj.wf_download_url) == 0

# The outdir for multiple revisions is the pipeline name and date: e.g. nf-core-rnaseq_2023-04-27_18-54
Expand Down
22 changes: 14 additions & 8 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,39 +160,45 @@ def test_pip_erroneous_package(self):
with pytest.raises(ValueError):
nf_core.utils.pip_package("not_a_package=1.0")

def test_get_repo_releases_branches_nf_core(self):
def test_get_repo_releases_branches_commits_nf_core(self):
wfs = nf_core.list.Workflows()
wfs.get_remote_workflows()
pipeline, wf_releases, wf_branches = nf_core.utils.get_repo_releases_branches("methylseq", wfs)
pipeline, wf_releases, wf_branches, wf_commits = nf_core.utils.get_repo_releases_branches_commits(
"methylseq", wfs
)
for r in wf_releases:
if r.get("tag_name") == "1.6":
break
else:
raise AssertionError("Release 1.6 not found")
assert "dev" in wf_branches.keys()
assert "54f823e102ef3d04077cc091a5ae435519f9923a" in wf_commits

def test_get_repo_releases_branches_not_nf_core(self):
def test_get_repo_releases_branches_commits_not_nf_core(self):
wfs = nf_core.list.Workflows()
wfs.get_remote_workflows()
pipeline, wf_releases, wf_branches = nf_core.utils.get_repo_releases_branches("MultiQC/MultiQC", wfs)
pipeline, wf_releases, wf_branches, wf_commits = nf_core.utils.get_repo_releases_branches_commits(
"MultiQC/MultiQC", wfs
)
for r in wf_releases:
if r.get("tag_name") == "v1.10":
break
else:
raise AssertionError("MultiQC release v1.10 not found")
assert "main" in wf_branches.keys()
assert "6764be5a6874f6601765e70316b404c01f6407fc" in wf_commits

def test_get_repo_releases_branches_not_exists(self):
def test_get_repo_releases_branches_commits_not_exists(self):
wfs = nf_core.list.Workflows()
wfs.get_remote_workflows()
with pytest.raises(AssertionError):
nf_core.utils.get_repo_releases_branches("made_up_pipeline", wfs)
nf_core.utils.get_repo_releases_branches_commits("made_up_pipeline", wfs)

def test_get_repo_releases_branches_not_exists_slash(self):
def test_get_repo_releases_branches_commits_not_exists_slash(self):
wfs = nf_core.list.Workflows()
wfs.get_remote_workflows()
with pytest.raises(AssertionError):
nf_core.utils.get_repo_releases_branches("made-up/pipeline", wfs)
nf_core.utils.get_repo_releases_branches_commits("made-up/pipeline", wfs)


def test_validate_file_md5():
Expand Down
Loading