-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
messy initial script, needs cleaning up
- Loading branch information
0 parents
commit 2a46ae0
Showing
11 changed files
with
377 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
cache/* | ||
!cache/.gitkeep | ||
.idea/* | ||
|
||
*DS_Store |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import os | ||
import json | ||
|
||
|
||
class FileCache: | ||
def __init__(self, location): | ||
self.location = location | ||
if not os.path.exists(location): | ||
with open(location, 'w') as file: | ||
json.dump({}, file) | ||
|
||
def add_to_cache(self, key, value): | ||
with open(self.location, 'r+') as file: | ||
cache = json.load(file) | ||
cache[key] = value | ||
file.seek(0) | ||
json.dump(cache, file) | ||
file.truncate() | ||
|
||
def retrieve_value(self, key): | ||
with open(self.location, 'r') as file: | ||
cache = json.load(file) | ||
result = cache.get(key) | ||
if result is not None: | ||
print(f"cache hit. key:{key}") | ||
return result | ||
|
||
def delete_cache(self): | ||
if os.path.exists(self.location): | ||
os.remove(self.location) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
import requests | ||
import os | ||
|
||
EXTENSIONS = [ | ||
".java", | ||
".groovy" | ||
] | ||
|
||
|
||
def matches_extensions(path: str): | ||
for ext in EXTENSIONS: | ||
if path.endswith(ext): | ||
return True | ||
return False | ||
|
||
|
||
def matches_directory(path: str): | ||
return path.startswith("instrumentation/") | ||
|
||
|
||
def matches_meta(item): | ||
return item["type"] == "blob" and "test" in item["path"] | ||
|
||
|
||
def parse_data(payload): | ||
data_result = [] | ||
tree = payload["tree"] | ||
for i in tree: | ||
if matches_meta(i) and matches_extensions(i["path"]) and matches_directory(i["path"]): | ||
data_result.append(i["path"]) | ||
|
||
json_result = { | ||
"files": data_result | ||
} | ||
return json_result | ||
|
||
|
||
class GithubClient(object): | ||
|
||
def __init__(self): | ||
token = os.environ.get("GITHUB_TOKEN") | ||
self.session = requests.Session() | ||
if len(token): | ||
self.session.headers.update({'Authorization': f'Bearer {token}'}) | ||
self.base_url = 'https://api.github.com' | ||
|
||
def _get(self, url, params=None): | ||
try: | ||
return self.session.get(url, params=params) | ||
except Exception as e: | ||
print(e) | ||
|
||
def get_most_recent_commit(self, repo, timestamp) -> requests.models.Response: | ||
api_url = f"{self.base_url}/repos/{repo}/commits" | ||
|
||
params = { | ||
"per_page": 1, | ||
"until": timestamp, | ||
"order": "desc" | ||
} | ||
|
||
response = self._get(api_url, params=params) | ||
|
||
if response.status_code == 200: | ||
commits = response.json() | ||
if len(commits) > 0: | ||
most_recent_commit = commits[0] | ||
return most_recent_commit['sha'] | ||
else: | ||
print("No commits found.") | ||
return None | ||
else: | ||
print(f"Error: {response.status_code}") | ||
return None | ||
|
||
def get_repository_at_commit(self, repository, commit_sha): | ||
api_url = f"{self.base_url}/repos/{repository}/git/trees/{commit_sha}?recursive=1" | ||
|
||
response = self._get(api_url) | ||
|
||
if response.status_code == 200: | ||
return parse_data(response.json()) | ||
else: | ||
print(f"Error: {response.status_code}") | ||
return None | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
from collections import defaultdict | ||
from typing import List | ||
from datetime import datetime, timedelta | ||
import matplotlib.pyplot as plt | ||
|
||
|
||
from file_cache import FileCache | ||
from github_client import GithubClient | ||
|
||
COMMIT_CACHE_FILE = 'cache/date-commit-cache.json' | ||
REPO_CACHE_FILE = 'cache/repo-cache.json' | ||
|
||
EXTENSIONS = [ | ||
".java", | ||
".groovy" | ||
] | ||
|
||
|
||
def count_by_file_type(files: List[str]) -> dict: | ||
file_counts = defaultdict(int) | ||
for file in files: | ||
for ext in EXTENSIONS: | ||
if file.endswith(ext): | ||
file_counts[ext] += 1 | ||
return file_counts | ||
|
||
|
||
def get_commit_by_date(gh_client: GithubClient, cache: FileCache, repository, date): | ||
find_commit = cache.retrieve_value(date) | ||
if not find_commit: | ||
find_commit = gh_client.get_most_recent_commit(repository, date) | ||
if find_commit: | ||
cache.add_to_cache(date, find_commit) | ||
|
||
return find_commit | ||
|
||
|
||
def get_repository_by_commit(gh_client: GithubClient, cache: FileCache, repository, commit): | ||
find_repo = cache.retrieve_value(commit) | ||
|
||
if not find_repo: | ||
find_repo = gh_client.get_repository_at_commit(repository, commit) | ||
cache.add_to_cache(commit, find_repo) | ||
|
||
return find_repo | ||
|
||
|
||
def get_dates_since(date_str): | ||
date_format = "%Y-%m-%d" | ||
output_format = "%Y-%m-%dT%H:%M:%SZ" | ||
|
||
# Parse the input date string | ||
start_date = datetime.strptime(date_str, date_format).date() | ||
|
||
# Get the current date | ||
end_date = datetime.now().date() | ||
|
||
# Calculate the difference in days | ||
days_diff = (end_date - start_date).days | ||
|
||
# Generate the list of dates | ||
date_list = [] | ||
for i in range(0, days_diff + 1, 14): | ||
date_item = start_date + timedelta(days=i) | ||
date_str = date_item.strftime(output_format) | ||
date_list.append(date_str) | ||
|
||
return date_list | ||
|
||
|
||
if __name__ == '__main__': | ||
print("starting") | ||
repo = "open-telemetry/opentelemetry-java-instrumentation" | ||
|
||
client = GithubClient() | ||
|
||
commit_cache = FileCache(COMMIT_CACHE_FILE) | ||
repo_cache = FileCache(REPO_CACHE_FILE) | ||
|
||
timeframe = get_dates_since("2022-11-15") | ||
result = defaultdict(dict) | ||
|
||
for snapshot in timeframe: | ||
try: | ||
commit = get_commit_by_date(gh_client=client, cache=commit_cache, date=snapshot, repository=repo) | ||
repo_files = get_repository_by_commit(gh_client=client, cache=repo_cache, repository=repo, commit=commit) | ||
count = count_by_file_type(repo_files["files"]) | ||
if count: | ||
result[snapshot] = { | ||
"date": snapshot, | ||
"java": count[".java"], | ||
"groovy": count[".groovy"] | ||
} | ||
except Exception as e: | ||
print(f"Error for {snapshot}, {e}") | ||
|
||
dates = [] | ||
java_counts = [] | ||
groovy_counts = [] | ||
|
||
for item in result.values(): | ||
date = item["date"][:10] | ||
java_count = item["java"] | ||
groovy_count = item["groovy"] | ||
dates.append(date) | ||
java_counts.append(java_count) | ||
groovy_counts.append(groovy_count) | ||
|
||
plt.plot(dates, java_counts, label='Java') | ||
plt.plot(dates, groovy_counts, label='Groovy') | ||
plt.xlabel('Date') | ||
plt.ylabel('Count') | ||
plt.title('Test Classes by Lang in Instrumentation Directory') | ||
plt.xticks(rotation=45) | ||
plt.legend() | ||
plt.tight_layout() | ||
plt.show() |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# Code Migration Tracker | ||
|
||
Goal: Given a repository, a timeframe, and any filtering rules, track a goal over time. | ||
|
||
## Setup | ||
|
||
A github token is not required but it is recommended as you will get rate limited if you make too many unauthenticated calls | ||
|
||
``` | ||
export GITHUB_TOKEN="insert-your-token" | ||
pip install -r requirements.txt | ||
python main.py | ||
``` | ||
|
||
## Example: | ||
|
||
In the `open-telemetry/opentelemetry-java-instrumentation` repository, track the conversion of tests from groovy to java | ||
in the `instrumentation` directory. | ||
|
||
Output: | ||
|
||
data:image/s3,"s3://crabby-images/f6fba/f6fba1ff8b26e47323bb561c33c33a3417ab99b0" alt="Example" | ||
|
||
## Approach | ||
|
||
- Query Github for point in time snapshots based on commits around times spanning a timeframe | ||
- Cache this data locally to avoid repeated api calls | ||
- Parse out counts of files that match criteria at each snapshot | ||
- Generate Graph to show results over time frame |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
requests | ||
unittest | ||
matplotlib |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
import unittest | ||
|
||
from file_cache import FileCache | ||
|
||
|
||
class FileCacheTestCase(unittest.TestCase): | ||
def test_add(self): | ||
cache = FileCache("test-cache") | ||
cache.add_to_cache("test", "value") | ||
|
||
self.assertEqual("value", cache.retrieve_value("test")) | ||
self.assertEqual(None, cache.retrieve_value("test2")) | ||
|
||
cache.delete_cache() | ||
|
||
def test_does_not_exist_returns_none(self): | ||
cache = FileCache("test-cache") | ||
self.assertEqual(None, cache.retrieve_value("test")) | ||
cache.delete_cache() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
{ | ||
"sha": "1e9b47b4c35f9046cec3718cadbc7410fdd9ffe1", | ||
"url": "https://api.github.com/repos/open-telemetry/opentelemetry-java-instrumentation/git/trees/1e9b47b4c35f9046cec3718cadbc7410fdd9ffe1", | ||
"tree": [ | ||
{ | ||
"path": ".editorconfig", | ||
"mode": "100644", | ||
"type": "blob", | ||
"sha": "201ab30485cae46b70f9abe2c575fd7629114e04", | ||
"size": 33579, | ||
"url": "https://api.github.com/repos/open-telemetry/opentelemetry-java-instrumentation/git/blobs/201ab30485cae46b70f9abe2c575fd7629114e04" | ||
}, | ||
{ | ||
"path": ".gitattributes", | ||
"mode": "100644", | ||
"type": "blob", | ||
"sha": "3982c9ad9a59f5608d66a3f8851f235e122e486e", | ||
"size": 92, | ||
"url": "https://api.github.com/repos/open-telemetry/opentelemetry-java-instrumentation/git/blobs/3982c9ad9a59f5608d66a3f8851f235e122e486e" | ||
}, | ||
{ | ||
"path": ".githooks", | ||
"mode": "040000", | ||
"type": "tree", | ||
"sha": "d83d121e48b61cd9de2a7c78940cf88fc0c07c05", | ||
"url": "https://api.github.com/repos/open-telemetry/opentelemetry-java-instrumentation/git/trees/d83d121e48b61cd9de2a7c78940cf88fc0c07c05" | ||
}, | ||
{ | ||
"path": "instrumentation/internal/internal-class-loader/javaagent-integration-tests/src/main/java/instrumentation/TestFailableCallable.java", | ||
"mode": "100644", | ||
"type": "blob", | ||
"sha": "7ce45826964d8da9d33d196de1265abff5aa28d2", | ||
"size": 268, | ||
"url": "https://api.github.com/repos/open-telemetry/opentelemetry-java-instrumentation/git/blobs/7ce45826964d8da9d33d196de1265abff5aa28d2" | ||
}, | ||
{ | ||
"path": "instrumentation/internal/internal-class-loader/javaagent-integration-tests/src/main/java/instrumentation/TestInstrumentationModule.java", | ||
"mode": "100644", | ||
"type": "blob", | ||
"sha": "e4044ababaa7ff7bd635158f8d90ee5c3894ed21", | ||
"size": 1986, | ||
"url": "https://api.github.com/repos/open-telemetry/opentelemetry-java-instrumentation/git/blobs/e4044ababaa7ff7bd635158f8d90ee5c3894ed21" | ||
}, | ||
{ | ||
"path": "instrumentation/internal/internal-class-loader/javaagent-integration-tests/src/main/resources", | ||
"mode": "040000", | ||
"type": "tree", | ||
"sha": "77f51c56d577df798aebb81e60b8a084f4412966", | ||
"url": "https://api.github.com/repos/open-telemetry/opentelemetry-java-instrumentation/git/trees/77f51c56d577df798aebb81e60b8a084f4412966" | ||
}, | ||
{ | ||
"path": "instrumentation/internal/internal-class-loader/javaagent-integration-tests/src/main/resources/test-resources", | ||
"mode": "040000", | ||
"type": "tree", | ||
"sha": "b2ec26ccf707818532735ed86f6df72528443c56", | ||
"url": "https://api.github.com/repos/open-telemetry/opentelemetry-java-instrumentation/git/trees/b2ec26ccf707818532735ed86f6df72528443c56" | ||
}, | ||
{ | ||
"path": "instrumentation/internal/internal-class-loader/javaagent-integration-tests/src/main/resources/test-resources/test-resource-2.txt", | ||
"mode": "100644", | ||
"type": "blob", | ||
"sha": "d6613f5f8b58eb6a88ee386ea140364c8645005c", | ||
"size": 12, | ||
"url": "https://api.github.com/repos/open-telemetry/opentelemetry-java-instrumentation/git/blobs/d6613f5f8b58eb6a88ee386ea140364c8645005c" | ||
} | ||
], | ||
"truncated": false | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
import unittest | ||
import json | ||
|
||
from github_client import parse_data | ||
|
||
|
||
class ParseDataTestCase(unittest.TestCase): | ||
def test_clean_payload(self): | ||
with open("test_mocks/tree_data.json", 'r') as file: | ||
data = json.load(file) | ||
test = parse_data(data) | ||
|
||
expects = set() | ||
expects.add("instrumentation/internal/internal-class-loader/javaagent-integration-tests/src/main/java/instrumentation/TestFailableCallable.java") | ||
expects.add("instrumentation/internal/internal-class-loader/javaagent-integration-tests/src/main/java/instrumentation/TestInstrumentationModule.java") | ||
self.assertEqual(set(test['files']), expects) | ||
|
||
|
||
|