diff --git a/Makefile b/Makefile index 5bc1885..f021d7c 100644 --- a/Makefile +++ b/Makefile @@ -16,6 +16,8 @@ update-example: pip3 install -r requirements.txt python3 main.py -r "open-telemetry/opentelemetry-java-instrumentation" -l "java,groovy" -s "2022-11-15" -i 14 -o "./media/example_output.png" python3 main.py -r "open-telemetry/opentelemetry-java-instrumentation" -l "groovy" -s "2022-11-15" -i 14 -o "./media/example_output2.png" + python3 count_by_instrumentation.py -r "open-telemetry/opentelemetry-java-instrumentation" -l "groovy" -o "./media/example_pie_output.png" + .PHONY: all diff --git a/count_by_instrumentation.py b/count_by_instrumentation.py new file mode 100644 index 0000000..e28432a --- /dev/null +++ b/count_by_instrumentation.py @@ -0,0 +1,86 @@ +from datetime import datetime +from typing import List +import pandas as pd +import seaborn as sns + +import matplotlib.pyplot as plt +import argparse + +from data_filter import DataFilter +from utilities import count_by_language_and_file_extension + +from github_client import GithubClient + + +class App: + def __init__(self, languages: List[str], path_prefix: str, keyword: str): + self.client = GithubClient() + self.data_filter = DataFilter(languages=languages, + path_prefix=path_prefix, keyword=keyword) + + def get_commit_by_date(self, repository, date): + return self.client.get_most_recent_commit(repository, date, "main") + + def get_repository_by_commit(self, repository, commit): + repo_data = self.client.get_repository_at_commit(repository, commit) + repo_data = self.data_filter.parse_data(repo_data) + + return repo_data + + +def main(args): + app = App( + languages=[args.language], + path_prefix="instrumentation/", + keyword="test" + ) + + today = datetime.now().date().strftime("%Y-%m-%dT%H:%M:%SZ") + + commit = app.get_commit_by_date(date=today, repository=args.repo) + repo_files = app.get_repository_by_commit( + repository=args.repo, + commit=commit + ) + count = count_by_language_and_file_extension(files=repo_files["files"], + languages=[args.language]) + + df = pd.DataFrame(list(count.items()), columns=['Key', 'Value']) + df = df.sort_values(by='Value', key=lambda col: col.astype(int), ascending=False) + + sns.set_theme() + colors = sns.color_palette('pastel')[0:len(df)] + + # Create a pie chart + explode = [0.05] * len(df) # this will "explode" each slice from the pie + df.set_index('Key')['Value'].plot.pie(autopct='%1.0f%%', colors=colors, + explode=explode) + + plt.title(f'Remaining {args.language} files by Instrumentation') + plt.ylabel('') + + print(df.to_markdown(index=False)) + print(f"| Total | {df['Value'].sum()} |") + + if args.output is not None: + plt.savefig(args.output) + else: + plt.show() + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description='Show Pie chart of file count in test folders') + parser.add_argument("-r", "--repo", + help="Repository name. " + "ex: open-telemetry/opentelemetry-java-instrumentation", + required=True) + parser.add_argument("-l", "--language", + help="Language to analyze" + "ex: groovy", + required=True) + parser.add_argument("-o", "--output", + help="File name to output graph to (leave blank and no file is generated)." + "ex: pie-chart-counts.png") + arguments = parser.parse_args() + main(arguments) diff --git a/main.py b/main.py index 0d98f72..65204e2 100644 --- a/main.py +++ b/main.py @@ -122,7 +122,7 @@ def main(args): plt.xlabel('Date', fontsize=14) plt.ylabel('Count', fontsize=14) - plt.title('Test File Count by Language in Instrumentation Directory', fontsize=16) + plt.title('Test File Count in Instrumentation Directory', fontsize=16) plt.xticks(rotation=45) plt.legend() diff --git a/media/example_output.png b/media/example_output.png index 593fb62..77ef629 100644 Binary files a/media/example_output.png and b/media/example_output.png differ diff --git a/media/example_output2.png b/media/example_output2.png index 40bd4ab..21b201a 100644 Binary files a/media/example_output2.png and b/media/example_output2.png differ diff --git a/media/example_pie_output.png b/media/example_pie_output.png new file mode 100644 index 0000000..f7b0ea1 Binary files /dev/null and b/media/example_pie_output.png differ diff --git a/readme.md b/readme.md index bf9ec42..6655708 100644 --- a/readme.md +++ b/readme.md @@ -84,6 +84,59 @@ Output: ![Example](./media/benchmark_output.png) +## Detail Count by Instrumentation module + +### Arguments + +| Argument | Command | Description | Example | +|------------|----------------|------------------------------------------------|------------------------------------------------------------| +| Repository | -r, --repo | Repository name. | --repo "open-telemetry/opentelemetry-java-instrumentation" | +| Language | -l, --language | Language to get count for | --language "groovy" | +| Output | -o, --output | (Optional) Location where file should be saved | --output "./media/example.png" | + +### Example Usage: + +In the `open-telemetry/opentelemetry-java-instrumentation` repository, analyze the files in test directories in the +`instrumentation` directory and outputting counts by module. + +`python count_by_instrumentation.py -r "open-telemetry/opentelemetry-java-instrumentation" -l "groovy"` + +Output: + +| Key | Value | +|:------------------|------:| +| spring | 52 | +| jaxrs | 37 | +| servlet | 23 | +| restlet | 22 | +| couchbase | 18 | +| aws-sdk | 17 | +| ratpack | 16 | +| elasticsearch | 15 | +| play | 15 | +| jaxws | 15 | +| vertx | 14 | +| mongo | 10 | +| jdbc | 8 | +| apache-dubbo-2.7 | 7 | +| jaxrs-client | 5 | +| netty | 5 | +| apache-httpclient | 3 | +| opentelemetry-api | 3 | +| grizzly-2.3 | 3 | +| grails-3.0 | 3 | +| undertow-1.4 | 3 | +| kafka | 3 | +| internal | 2 | +| dropwizard | 2 | +| hibernate | 1 | +| rediscala-1.8 | 1 | +| spymemcached-2.12 | 1 | +| twilio-6.6 | 1 | +| Total | 305 | + +![Example](./media/example_pie_output.png) + ## Approach - Query Github for point in time snapshots based on commits around times spanning a timeframe diff --git a/requirements.txt b/requirements.txt index 60c2ba1..e854594 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,5 @@ pytest pytest-cov ruff seaborn -pandas \ No newline at end of file +pandas +tabulate \ No newline at end of file diff --git a/utilities.py b/utilities.py index 651fef9..3c52e7f 100644 --- a/utilities.py +++ b/utilities.py @@ -1,6 +1,6 @@ from datetime import datetime, timedelta from collections import defaultdict -from typing import List +from typing import List, Dict def get_dates_between(start_date_str, end_date, interval): @@ -37,6 +37,18 @@ def count_by_file_extension(files: List[str], languages: List[str]) -> dict: return file_counts +def count_by_language_and_file_extension(files: List[str], languages: List[str]) -> Dict[str, Dict[str, int]]: + counts = defaultdict(int) + for file in files: + file_parts = file.split('/') + if len(file_parts) < 3: + continue + instrumentation = file_parts[1] + extension = file_parts[-1].split('.')[-1] + if extension in languages: + counts[instrumentation] += 1 + return counts + def convert_to_plot(input_dict: dict, items): result = {} dates = []