Skip to content

Commit

Permalink
feat(cachi2): add support for git-submodules
Browse files Browse the repository at this point in the history
for git-submodules pkg manager, OSBS msut hadnle all the work, cachi2
doesn't manipuilate git.

Submodules has to be cloned and metadata exported into SBOM and
request.json

Signed-off-by: Martin Basti <mbasti@redhat.com>
  • Loading branch information
MartinBasti committed Jan 21, 2025
1 parent 0f26fe2 commit a816fae
Show file tree
Hide file tree
Showing 8 changed files with 228 additions and 5 deletions.
2 changes: 2 additions & 0 deletions atomic_reactor/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,8 @@
HTTP_CLIENT_STATUS_RETRY = (408, 429, 500, 502, 503, 504)
# requests timeout in seconds
HTTP_REQUEST_TIMEOUT = 600
# git cmd timeout in seconds
GIT_CMD_TIMEOUT = 600
# max retries for git clone
GIT_MAX_RETRIES = 3
# how many seconds should wait before another try of git clone
Expand Down
12 changes: 12 additions & 0 deletions atomic_reactor/plugins/cachi2_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from typing import Any, Optional, List, Dict
from pathlib import Path

import git
from osbs.utils import clone_git_repo

from atomic_reactor.constants import (
Expand All @@ -28,6 +29,8 @@
from atomic_reactor.utils.cachi2 import (
remote_source_to_cachi2, clone_only, validate_paths,
normalize_gomod_pkg_manager, enforce_sandbox,
has_git_submodule_manager, update_submodules,
get_submodules_sbom, get_submodules_request_json_deps,
)


Expand Down Expand Up @@ -135,6 +138,15 @@ def process_remote_sources(self) -> List[Dict[str, Any]]:
remote_source_data["ref"]
)

if has_git_submodule_manager(remote_source_data):
update_submodules(source_path_app)
repo = git.Repo(str(source_path_app))
git_submodules = {
"sbom": get_submodules_sbom(repo),
"request_json": get_submodules_request_json_deps(repo)
}
remote_source["git_submodules"] = git_submodules

enforce_sandbox(source_path_app, remove_unsafe_symlinks=False)
validate_paths(source_path_app, remote_source_data.get("packages", {}))

Expand Down
44 changes: 42 additions & 2 deletions atomic_reactor/plugins/cachi2_postprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from atomic_reactor.constants import (
CACHITO_ENV_ARG_ALIAS,
CACHITO_ENV_FILENAME,
CACHI2_BUILD_DIR,
PLUGIN_CACHI2_INIT,
PLUGIN_CACHI2_POSTPROCESS,
REMOTE_SOURCE_DIR,
Expand Down Expand Up @@ -110,13 +111,38 @@ def run(self) -> Optional[List[Dict[str, Any]]]:
return None

processed_remote_sources = self.postprocess_remote_sources()
self.postprocess_git_submodules_global_sbom()
self.inject_remote_sources(processed_remote_sources)

return [
self.remote_source_to_output(remote_source)
for remote_source in processed_remote_sources
]

def postprocess_git_submodules_global_sbom(self):
"""atomic-reactor is responsbile for handling git-submodules. Global SBOM must be updated"""
all_sboms = []
for remote_source in self.init_plugin_data:
git_submodules = remote_source.get('git_submodules')
if not git_submodules:
continue

all_sboms.extend(git_submodules['sboms'])

if not all_sboms:
return

# TODO: deduplicate sboms?

global_sbom_path = self.workflow.build_dir.path/CACHI2_BUILD_DIR/"bom.json"
with open(global_sbom_path, 'r') as global_sbom_f:
global_sbom_data = json.load(global_sbom_f)
global_sbom_data['components'].extend(all_sboms)

with open(global_sbom_path, 'w') as global_sbom_f:
json.dump(global_sbom_data, global_sbom_f)
global_sbom_f.flush()

def postprocess_remote_sources(self) -> List[Cachi2RemoteSource]:
"""Process remote source requests and return information about the processed sources."""

Expand All @@ -132,12 +158,26 @@ def postprocess_remote_sources(self) -> List[Cachi2RemoteSource]:
with open(sbom_path, 'r') as sbom_f:
sbom_data = json.load(sbom_f)

# request_json must be generated before modifications to sboms are done
request_json = generate_request_json(
remote_source['remote_source'], sbom_data, json_env_data)

# update metadata with submodules info
git_submodules = remote_source.get('git_submodules')
if git_submodules:
sbom_data['components'].extend(git_submodules['sboms'])

with open(sbom_path, 'w') as sbom_f:
json.dump(sbom_data, sbom_f)
sbom_f.flush()

request_json['dependencies'].extend(git_submodules['request_json'])

remote_source_obj = Cachi2RemoteSource(
name=remote_source['name'],
tarball_path=Path(remote_source['source_path'], 'remote-source.tar.gz'),
sources_path=Path(remote_source['source_path']),
json_data=generate_request_json(
remote_source['remote_source'], sbom_data, json_env_data),
json_data=request_json,
json_env_data=json_env_data,
)
processed_remote_sources.append(remote_source_obj)
Expand Down
91 changes: 91 additions & 0 deletions atomic_reactor/utils/cachi2.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,14 @@
from typing import Any, Callable, Dict, Optional, Tuple, List
from pathlib import Path
import os.path
import urllib

import git
from packageurl import PackageURL

from atomic_reactor import constants
from atomic_reactor.utils import retries

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -286,4 +291,90 @@ def clone_only(remote_source: Dict[str, Any]) -> bool:
if pkg_managers is not None and len(pkg_managers) == 0:
return True

# only git-submodule
if pkg_managers is not None and pkg_managers == ['git-submodule']:
return True

return False


def has_git_submodule_manager(remote_source: Dict[str, Any]) -> bool:
"""Returns true when for specific remote source git-submodule manager is requested"""
pkg_managers = remote_source.get("pkg_managers") or []
return 'git-submodule' in pkg_managers


def update_submodules(repopath: Path):
"""Update submodules in the given repo"""
cmd = ["git", "submodule", "update", "--init", "--filter=blob:none"]
params = {
"cwd": str(repopath),
"universal_newlines": True,
"timeout": constants.GIT_CMD_TIMEOUT,
}
retries.run_cmd(cmd, **params)


def get_submodules_sbom(repo: git.Repo) -> List[Dict]:
"""Get SBOM of submodules in the specified repository"""

def to_vcs_purl(pkg_name, repo_url, ref):
"""
Generate the vcs purl representation of the package.
Use the most specific purl type possible, e.g. pkg:github if repo comes from
github.com. Fall back to using pkg:generic with a ?vcs_url qualifier.
:param str pkg_name: name of package
:param str repo_url: url of git repository for package
:param str ref: git ref of package
:return: the PURL string of the Package object
:rtype: str
"""
repo_url = repo_url.rstrip("/")
parsed_url = urllib.parse.urlparse(repo_url)

pkg_type_for_hostname = {
"github.com": "github",
"bitbucket.org": "bitbucket",
}
pkg_type = pkg_type_for_hostname.get(parsed_url.hostname, "generic")

if pkg_type == "generic":
vcs_url = urllib.parse.quote(f"{repo_url}@{ref}", safe="")
purl = f"pkg:generic/{pkg_name}?vcs_url={vcs_url}"
else:
# pkg:github and pkg:bitbucket use the same format
namespace, repo = parsed_url.path.lstrip("/").rsplit("/", 1)
if repo.endswith(".git"):
repo = repo[: -len(".git")]
purl = f"pkg:{pkg_type}/{namespace.lower()}/{repo.lower()}@{ref}"

return purl

submodules_sbom = [
{
"type": "library",
"name": sm.name,
"version": f"{sm.url}#{sm.hexsha}",
"purl": to_vcs_purl(sm.name, sm.url, sm.hexsha)
}
for sm in repo.submodules
]

return submodules_sbom


def get_submodules_request_json_deps(repo: git.Repo) -> List[Dict]:
"""Get dependencies for request.json from submodule"""
submodules_request_json_dependencies = [
{
"type": "git-submodule",
"name": sm.name,
"path": sm.name,
"version": f"{sm.url}#{sm.hexsha}",
}
for sm in repo.submodules
]

return submodules_request_json_dependencies
8 changes: 5 additions & 3 deletions atomic_reactor/utils/retries.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def get_retrying_requests_session(client_statuses=HTTP_CLIENT_STATUS_RETRY,
max_tries=SUBPROCESS_MAX_RETRIES + 1, # total tries is N retries + 1 initial attempt
jitter=None, # use deterministic backoff, do not apply random jitter
)
def run_cmd(cmd: List[str], cleanup_cmd: List[str] = None) -> bytes:
def run_cmd(cmd: List[str], cleanup_cmd: List[str] = None, **params) -> bytes:
"""Run a subprocess command, retry on any non-zero exit status.
Whenever an attempt fails, the stdout and stderr of the failed command will be logged.
Expand All @@ -98,12 +98,14 @@ def run_cmd(cmd: List[str], cleanup_cmd: List[str] = None) -> bytes:
If a cleanup command is specified it'll be run on exception before retry.
:param params: optional params to be passed to subprocess.run function
:return: bytes, the combined stdout and stderr (if any) of the command
"""
logger.debug("Running %s", " ".join(cmd))

try:
process = subprocess.run(cmd, check=True, capture_output=True)
process = subprocess.run(cmd, check=True, capture_output=True, **params)
except subprocess.CalledProcessError as e:
logger.warning(
"%s failed:\nSTDOUT:\n%s\nSTDERR:\n%s",
Expand All @@ -114,7 +116,7 @@ def run_cmd(cmd: List[str], cleanup_cmd: List[str] = None) -> bytes:
if cleanup_cmd:
try:
logger.debug("Running %s", " ".join(cleanup_cmd))
subprocess.run(cleanup_cmd, check=True, capture_output=True)
subprocess.run(cleanup_cmd, check=True, capture_output=True, **params)
except subprocess.CalledProcessError as c_e:
logger.warning(
"Cleanup command: %s failed:\nSTDOUT:\n%s\nSTDERR:\n%s",
Expand Down
1 change: 1 addition & 0 deletions requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
backoff
dockerfile-parse>=0.0.13
flatpak-module-tools>=0.14
gitpython
jsonschema
paramiko>=3.4.0
PyYAML
Expand Down
6 changes: 6 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ editables==0.5
# via hatchling
flatpak-module-tools==0.14
# via -r requirements.in
gitdb==4.0.12
# via gitpython
gitpython==3.1.44
# via -r requirements.in
googleapis-common-protos==1.60.0
# via
# opentelemetry-exporter-otlp-proto-grpc
Expand Down Expand Up @@ -176,6 +180,8 @@ six==1.16.0
# koji
# osbs-client
# python-dateutil
smmap==5.0.2
# via gitdb
tomli==2.0.1
# via hatchling
trove-classifiers==2023.8.7
Expand Down
69 changes: 69 additions & 0 deletions tests/utils/test_cachi2.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from pathlib import Path
from typing import Union

import git

from atomic_reactor.utils.cachi2 import (
SymlinkSandboxError,
convert_SBOM_to_ICM,
Expand All @@ -19,13 +21,19 @@
generate_request_json,
clone_only,
validate_paths,
has_git_submodule_manager,
)

import pytest

from unittest import mock


@pytest.fixture
def mocked_repo_submodules():
"""Mock submodules repo"""


@pytest.mark.parametrize(('input_remote_source', 'expected_cachi2'), [
pytest.param(
{"pkg_managers": ["gomod"]},
Expand Down Expand Up @@ -563,13 +571,27 @@ def test_generate_request_json():
True,
id="empty_list"
),
pytest.param(
{
"pkg_managers": ["git-submodule"]
},
True,
id="git_submodule"
),
pytest.param(
{
"pkg_managers": ["gomod"]
},
False,
id="gomod"
),
pytest.param(
{
"pkg_managers": ["gomod", "git-submodule"]
},
False,
id="gomod_and_git_submodule"
),
pytest.param(
{},
False,
Expand All @@ -588,6 +610,53 @@ def test_clone_only(remote_source, expected):
assert clone_only(remote_source) == expected


@pytest.mark.parametrize('remote_source,expected', [
pytest.param(
{
"pkg_managers": []
},
False,
id="empty_list"
),
pytest.param(
{
"pkg_managers": ["git-submodule"]
},
True,
id="git_submodule"
),
pytest.param(
{
"pkg_managers": ["gomod"]
},
False,
id="gomod"
),
pytest.param(
{
"pkg_managers": ["gomod", "git-submodule"]
},
True,
id="gomod_and_git_submodule"
),
pytest.param(
{},
False,
id="undefined"
),
pytest.param(
{
"pkg_managers": None
},
False,
id="explicit_none"
),
])
def test_has_git_submodule_manager(remote_source, expected):
"""Test if has_git_submodule_manager correctly detects git-submodule"""
assert has_git_submodule_manager(remote_source) == expected


class Symlink(str):
"""
Use this to create symlinks via write_file_tree().
Expand Down

0 comments on commit a816fae

Please sign in to comment.