Skip to content

Commit

Permalink
add new script for analyzing readmes (#15)
Browse files Browse the repository at this point in the history
  • Loading branch information
jaydeluca authored Jan 9, 2025
1 parent e70ae84 commit 2c35bda
Show file tree
Hide file tree
Showing 3 changed files with 175 additions and 1 deletion.
2 changes: 1 addition & 1 deletion github_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def _get(self, url, params=None):
except Exception as e:
print(e)

def get_most_recent_commit(self, repo: str, timestamp: str, branch: str) -> requests.models.Response:
def get_most_recent_commit(self, repo: str, timestamp: str, branch: str):
api_url = f"{self.base_url}/repos/{repo}/commits"

params = {
Expand Down
119 changes: 119 additions & 0 deletions instrumentation_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
from datetime import datetime
from typing import List, Set
import pandas as pd

from github_client import GithubClient


class Instrumentation:
def __init__(self, name: str, has_javaagent: bool = False,
has_library: bool = False, parent: str = None):
self.name = name
self.has_javaagent = has_javaagent
self.has_library = has_library
self.parent = parent


def analyze_instrumentation(file_list: List[str]) -> List[Instrumentation]:
instrumentations = {}
for i in file_list:
parts = i.split("/")
inst_name = parts[0]
parent = None
if len(parts) > 2:
inst_name = parts[len(parts) - 2]
parent = i.split(inst_name)[0].rstrip("/")
inst = instrumentations.get(inst_name, Instrumentation(inst_name))
if i.endswith("/javaagent"):
inst.has_javaagent = True
elif i.endswith("/library"):
inst.has_library = True

inst.parent = parent
instrumentations[inst_name] = inst

items = list(instrumentations.values())
return items


def parse_readme(file_list: List[str]) -> (Set[str], Set[str]):
javaagent_has_readme = set()
library_has_readme = set()

for i in file_list:
parts = i.split("/")
if i.lower().endswith("javaagent/readme.md"):
javaagent_has_readme.add(parts[len(parts) - 3])
elif i.lower().endswith("library/readme.md"):
library_has_readme.add(parts[len(parts) - 3])

return javaagent_has_readme, library_has_readme


def main():
repo = "open-telemetry/opentelemetry-java-instrumentation"
client = GithubClient()
today = (datetime.now().date() + pd.Timedelta(days=1)).strftime(
"%Y-%m-%dT%H:%M:%SZ")

commit = client.get_most_recent_commit(repo, today, "main")
repo_files = client.get_repository_at_commit(
repository=repo,
commit_sha=commit
)

instrumentations = []
readmes = []

for i in repo_files["tree"]:

if i["path"].lower().endswith("readme.md"):
readmes.append(i["path"].replace("instrumentation/", ""))

if i["path"].startswith("instrumentation/") \
and i["type"] == "tree" \
and (i["path"].endswith("/javaagent") or i["path"].endswith("/library")) \
and "/io/opentelemetry/javaagent" not in i["path"] \
and "-common/" not in i["path"]:

instrumentations.append(i["path"].replace("instrumentation/", ""))

inst_list = analyze_instrumentation(instrumentations)
javaagent_has_readme, library_has_readme = parse_readme(readmes)
library: List[Instrumentation] = []
javaagent: List[Instrumentation] = []

no_javaagent = []

output = ""
for i in inst_list:
output += f"{i.name}:\n"
if i.has_javaagent:
output += " javaagent\n"
javaagent.append(i)
else:
no_javaagent.append(i)
if i.has_library:
output += " library\n"
library.append(i)

javaagent_count = len(javaagent)
library_count = len(library)

print(f"{len(inst_list)} instrumentation items")
print("\n")
print(f"{javaagent_count} javaagent instrumentations ({int(javaagent_count / len(inst_list) * 100)}%)")
print(f"Readmes: {len(javaagent_has_readme)}\n\n")

print(f"{library_count} library instrumentations ({int(library_count / len(inst_list) * 100)}%)")
print(f"Readmes: {len(library_has_readme)}")

print("\nLibraries:\n")
for i in library:
full_inst_name = f"{i.parent}/{i.name}" if i.parent else i.name
link = f"https://github.com/open-telemetry/opentelemetry-java-instrumentation/tree/main/instrumentation/{full_inst_name}/library"
print(f"{'- [x]' if i.name in library_has_readme else '- [ ]'} [{i.name}]({link})")


if __name__ == '__main__':
main()
55 changes: 55 additions & 0 deletions instrumentation_analysis_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import unittest

from instrumentation_analysis import analyze_instrumentation, parse_readme


class TestInstrumentationAnalysis(unittest.TestCase):
def test_parse_file_list(self):
file_list = [
"akka/akka-actor-2.3/javaagent",
"akka/akka-actor-fork-join-2.5/library"
]

result = analyze_instrumentation(file_list)

self.assertEqual(len(result), 2)

self.assertEqual(result[0].name, "akka-actor-2.3")
self.assertEqual(result[0].has_javaagent, True)
self.assertEqual(result[0].has_library, False)
self.assertEqual(result[0].parent, "akka")

self.assertEqual(result[1].name, "akka-actor-fork-join-2.5")
self.assertEqual(result[1].has_javaagent, False)
self.assertEqual(result[1].has_library, True)
self.assertEqual(result[1].parent, "akka")

def test_parse_file_list_with_two_layers(self):
file_list = [
"spring/spring-webmvc/spring-webmvc-5.3/library",
]

result = analyze_instrumentation(file_list)

self.assertEqual(len(result), 1)

self.assertEqual(result[0].name, "spring-webmvc-5.3")
self.assertEqual(result[0].has_javaagent, False)
self.assertEqual(result[0].has_library, True)
self.assertEqual(result[0].parent, "spring/spring-webmvc")

def test_parse_readme(self):
input = [
'spring/spring-webmvc/spring-webmvc-5.3/library/README.md',
'aws-lambda/aws-lambda-core-1.0/javaagent/README.md',
'ktor/ktor-1.0/library/README.md',
'java-http-client/library/README.md'
]

javaagents_with_readmes, libraries_with_readmes = parse_readme(input)


self.assertIn('aws-lambda-core-1.0', javaagents_with_readmes)
self.assertIn('spring-webmvc-5.3', libraries_with_readmes)
self.assertIn('ktor-1.0', libraries_with_readmes)
self.assertIn('java-http-client', libraries_with_readmes)

0 comments on commit 2c35bda

Please sign in to comment.