Skip to content

Commit

Permalink
perf: Fix performance issue when assigning previous version
Browse files Browse the repository at this point in the history
Assigning the previous version was checking commits multiple times, when ever a merge commit was encountered.
This leads to severe performance issues on a commit graph with lots of merge commits.

Instead of traversing the commit graph again, finding the previous version is now part of the commit to version grouping.

PR-78: #78
  • Loading branch information
chme authored Mar 23, 2024
1 parent 58a4d88 commit f35c88b
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 21 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ actions = \
docs-deploy \
format \
help \
profile \
release \
run \
setup \
Expand Down
39 changes: 39 additions & 0 deletions duties.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,19 @@
import os
import sys
from contextlib import contextmanager
from cProfile import Profile
from importlib.metadata import version as pkgversion
from pathlib import Path
from pstats import SortKey, Stats
from tempfile import TemporaryDirectory
from typing import TYPE_CHECKING, Iterator

from duty import duty
from duty.callables import coverage, lazy, mkdocs, mypy, pytest, ruff, safety

from git_changelog import Changelog
from git_changelog.commit import AngularConvention

if TYPE_CHECKING:
from duty.context import Context

Expand Down Expand Up @@ -283,3 +289,36 @@ def update_config(filename: str) -> None:

for filename in ("launch.json", "settings.json", "tasks.json"):
ctx.run(update_config, args=[filename], title=f"Update .vscode/{filename}")


@duty
def profile(ctx: Context, merge: int = 15) -> None:
"""Profile the parsing and grouping of commits.
Parameters:
merge: Number of times to merge a branch in the temporary repository.
"""
try:
from tests.helpers import GitRepo
except ModuleNotFoundError:
import sys

sys.path.insert(0, str(Path(__file__).parent))
from tests.helpers import GitRepo

def merge_branches(repo: GitRepo, branch: str = "feat", times: int = 15) -> None:
for _ in range(times):
repo.branch(branch)
repo.checkout(branch)
repo.commit(f"feat: {branch}")
repo.checkout("main")
repo.merge(branch)
repo.git("branch", "-d", branch)

with TemporaryDirectory() as tmp_dir:
repo = GitRepo(Path(tmp_dir) / "repo")
ctx.run(merge_branches, args=(repo, "feat", merge), title="Creating temporary repository")

with Profile() as profile:
Changelog(repo.path, convention=AngularConvention)
Stats(profile).strip_dirs().sort_stats(SortKey.TIME).print_stats()
38 changes: 17 additions & 21 deletions src/git_changelog/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,6 @@ def __init__(
v_list, v_dict = self._group_commits_by_version()
self.versions_list = v_list
self.versions_dict = v_dict
self._assign_previous_versions()

# TODO: remove at some point
if bump_latest:
Expand Down Expand Up @@ -411,6 +410,7 @@ def _group_commits_by_version(self) -> tuple[list[Version], dict[str, Version]]:
"""
versions_dict: dict[str, Version] = {}
versions_list: list[Version] = []
previous_versions: dict[str, str] = {}

# Iterate in reversed order (oldest to newest tag) to assign commits to the first version they were released with.
for tag_commit in reversed(self.tag_commits):
Expand All @@ -421,13 +421,20 @@ def _group_commits_by_version(self) -> tuple[list[Version], dict[str, Version]]:

# Find all commits for this version by following the commit graph.
version.add_commit(tag_commit)
previous_semver: SemverVersion | None = None
next_commits = tag_commit.parent_commits # Always new: we can mutate it.
while next_commits:
next_commit = next_commits.pop(0)
if not next_commit.tag and not next_commit.version:
if next_commit.tag:
semver, _ = parse_version(next_commit.tag)
if not previous_semver or semver.compare(previous_semver) > 0:
previous_semver = semver
previous_versions[version.tag] = next_commit.tag
elif not next_commit.version:
version.add_commit(next_commit)
next_commits.extend(next_commit.parent_commits)

self._assign_previous_versions(versions_dict, previous_versions)
return versions_list, versions_dict

def _create_version(self, commit: Commit) -> Version:
Expand All @@ -437,7 +444,7 @@ def _create_version(self, commit: Commit) -> Version:
version.url = self.provider.get_tag_url(tag=commit.version)
return version

def _assign_previous_versions(self) -> None:
def _assign_previous_versions(self, versions_dict: dict[str, Version], previous_versions: dict[str, str]) -> None:
"""Assign each version its previous version and create the compare URL.
The previous version is defined as the version with the highest semantic version,
Expand All @@ -446,25 +453,14 @@ def _assign_previous_versions(self) -> None:
If no previous version is found, either because it is the first commit or
due to the commit filter excluding it, the compare URL is created with the
first commit (oldest).
"""
for version in self.versions_list:
next_commits = version.commits[0].parent_commits # Always new: we can mutate it.
previous_semver: SemverVersion | None = None
previous_version = ""
while next_commits:
next_commit = next_commits.pop(0)
if next_commit.tag:
semver, _ = parse_version(next_commit.tag)
if not previous_semver or semver.compare(previous_semver) > 0:
previous_semver = semver
previous_version = next_commit.tag
else:
next_commits.extend(next_commit.parent_commits)
if not previous_version:
previous_version = version.commits[-1].hash

version.previous_version = self.versions_dict.get(previous_version)
Arguments:
versions_dict: A dictionary of versions with the tag name as keys.
previous_versions: A dictonary with version and previous version.
"""
for version in versions_dict.values():
previous_version = previous_versions.get(version.tag, version.commits[-1].hash)
version.previous_version = versions_dict.get(previous_version)
if version.previous_version:
version.previous_version.next_version = version
if self.provider:
Expand Down
1 change: 1 addition & 0 deletions tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def __init__(self, repo: Path) -> None:
repo: Path to the git repository.
"""
self.path = repo
self.path.mkdir(parents=True, exist_ok=True)
self.git("init", "-b", "main")
self.git("config", "user.name", "dummy")
self.git("config", "user.email", "dummy@example.com")
Expand Down

0 comments on commit f35c88b

Please sign in to comment.