Skip to content

Commit

Permalink
add pie chart with detailed breakdown (#11)
Browse files Browse the repository at this point in the history
  • Loading branch information
jaydeluca authored Aug 10, 2024
1 parent ee3101d commit cbef6a3
Show file tree
Hide file tree
Showing 10 changed files with 160 additions and 7 deletions.
2 changes: 1 addition & 1 deletion .ruff.toml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# Never enforce `E501` (line length violations).
ignore = ["E501"]
lint.ignore = ["E501"]
5 changes: 3 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@ test:

.PHONY: lint
lint:
ruff --format=github --select=E9,F63,F7,F82 --target-version=py37 .
ruff --format=github --target-version=py37 .
ruff check

.PHONY: update-examples
update-example:
pip3 install -r requirements.txt
python3 main.py -r "open-telemetry/opentelemetry-java-instrumentation" -l "java,groovy" -s "2022-11-15" -i 14 -o "./media/example_output.png"
python3 main.py -r "open-telemetry/opentelemetry-java-instrumentation" -l "groovy" -s "2022-11-15" -i 14 -o "./media/example_output2.png"
python3 count_by_instrumentation.py -r "open-telemetry/opentelemetry-java-instrumentation" -l "groovy" -o "./media/example_pie_output.png"



.PHONY: all
Expand Down
86 changes: 86 additions & 0 deletions count_by_instrumentation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
from datetime import datetime
from typing import List
import pandas as pd
import seaborn as sns

import matplotlib.pyplot as plt
import argparse

from data_filter import DataFilter
from utilities import count_by_language_and_file_extension

from github_client import GithubClient


class App:
def __init__(self, languages: List[str], path_prefix: str, keyword: str):
self.client = GithubClient()
self.data_filter = DataFilter(languages=languages,
path_prefix=path_prefix, keyword=keyword)

def get_commit_by_date(self, repository, date):
return self.client.get_most_recent_commit(repository, date, "main")

def get_repository_by_commit(self, repository, commit):
repo_data = self.client.get_repository_at_commit(repository, commit)
repo_data = self.data_filter.parse_data(repo_data)

return repo_data


def main(args):
app = App(
languages=[args.language],
path_prefix="instrumentation/",
keyword="test"
)

today = datetime.now().date().strftime("%Y-%m-%dT%H:%M:%SZ")

commit = app.get_commit_by_date(date=today, repository=args.repo)
repo_files = app.get_repository_by_commit(
repository=args.repo,
commit=commit
)
count = count_by_language_and_file_extension(files=repo_files["files"],
languages=[args.language])

df = pd.DataFrame(list(count.items()), columns=['Key', 'Value'])
df = df.sort_values(by='Value', key=lambda col: col.astype(int), ascending=False)

sns.set_theme()
colors = sns.color_palette('pastel')[0:len(df)]

# Create a pie chart
explode = [0.05] * len(df) # this will "explode" each slice from the pie
df.set_index('Key')['Value'].plot.pie(autopct='%1.0f%%', colors=colors,
explode=explode)

plt.title(f'Remaining {args.language} files by Instrumentation')
plt.ylabel('')

print(df.to_markdown(index=False))
print(f"| Total | {df['Value'].sum()} |")

if args.output is not None:
plt.savefig(args.output)
else:
plt.show()


if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='Show Pie chart of file count in test folders')
parser.add_argument("-r", "--repo",
help="Repository name. "
"ex: open-telemetry/opentelemetry-java-instrumentation",
required=True)
parser.add_argument("-l", "--language",
help="Language to analyze"
"ex: groovy",
required=True)
parser.add_argument("-o", "--output",
help="File name to output graph to (leave blank and no file is generated)."
"ex: pie-chart-counts.png")
arguments = parser.parse_args()
main(arguments)
2 changes: 1 addition & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def main(args):

plt.xlabel('Date', fontsize=14)
plt.ylabel('Count', fontsize=14)
plt.title('Test File Count by Language in Instrumentation Directory', fontsize=16)
plt.title('Test File Count in Instrumentation Directory', fontsize=16)
plt.xticks(rotation=45)

plt.legend()
Expand Down
Binary file modified media/example_output.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified media/example_output2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added media/example_pie_output.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
53 changes: 53 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,59 @@ Output:

![Example](./media/benchmark_output.png)

## Detail Count by Instrumentation module

### Arguments

| Argument | Command | Description | Example |
|------------|----------------|------------------------------------------------|------------------------------------------------------------|
| Repository | -r, --repo | Repository name. | --repo "open-telemetry/opentelemetry-java-instrumentation" |
| Language | -l, --language | Language to get count for | --language "groovy" |
| Output | -o, --output | (Optional) Location where file should be saved | --output "./media/example.png" |

### Example Usage:

In the `open-telemetry/opentelemetry-java-instrumentation` repository, analyze the files in test directories in the
`instrumentation` directory and outputting counts by module.

`python count_by_instrumentation.py -r "open-telemetry/opentelemetry-java-instrumentation" -l "groovy"`

Output:

| Key | Value |
|:------------------|------:|
| spring | 52 |
| jaxrs | 37 |
| servlet | 23 |
| restlet | 22 |
| couchbase | 18 |
| aws-sdk | 17 |
| ratpack | 16 |
| elasticsearch | 15 |
| play | 15 |
| jaxws | 15 |
| vertx | 14 |
| mongo | 10 |
| jdbc | 8 |
| apache-dubbo-2.7 | 7 |
| jaxrs-client | 5 |
| netty | 5 |
| apache-httpclient | 3 |
| opentelemetry-api | 3 |
| grizzly-2.3 | 3 |
| grails-3.0 | 3 |
| undertow-1.4 | 3 |
| kafka | 3 |
| internal | 2 |
| dropwizard | 2 |
| hibernate | 1 |
| rediscala-1.8 | 1 |
| spymemcached-2.12 | 1 |
| twilio-6.6 | 1 |
| Total | 305 |

![Example](./media/example_pie_output.png)

## Approach

- Query Github for point in time snapshots based on commits around times spanning a timeframe
Expand Down
5 changes: 3 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ matplotlib
argparse
pytest
pytest-cov
ruff
ruff==0.5.7
seaborn
pandas
pandas
tabulate
14 changes: 13 additions & 1 deletion utilities.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from datetime import datetime, timedelta
from collections import defaultdict
from typing import List
from typing import List, Dict


def get_dates_between(start_date_str, end_date, interval):
Expand Down Expand Up @@ -37,6 +37,18 @@ def count_by_file_extension(files: List[str], languages: List[str]) -> dict:
return file_counts


def count_by_language_and_file_extension(files: List[str], languages: List[str]) -> Dict[str, Dict[str, int]]:
counts = defaultdict(int)
for file in files:
file_parts = file.split('/')
if len(file_parts) < 3:
continue
instrumentation = file_parts[1]
extension = file_parts[-1].split('.')[-1]
if extension in languages:
counts[instrumentation] += 1
return counts

def convert_to_plot(input_dict: dict, items):
result = {}
dates = []
Expand Down

0 comments on commit cbef6a3

Please sign in to comment.