diff --git a/tools/all_repos.py b/tools/all_repos.py new file mode 100644 index 0000000..256d69a --- /dev/null +++ b/tools/all_repos.py @@ -0,0 +1,183 @@ +# https://packaging.python.org/en/latest/specifications/inline-script-metadata/ +# /// script +# requires-python = ">=3.12" +# dependencies = [ +# "requests", +# "rich", +# "beautifulsoup4", +# ] +# /// +"""GitHub Organization Activity Tracker + +This module tracks and reports the last activity of members across GitHub organizations. +It implements disk-based caching to minimize API requests and respect rate limits. +""" + +import os +import asks +from rich import print +import trio + +import requests +from rich import print +from bs4 import BeautifulSoup + + +def get_packages(url): + # Send a GET request to the webpage with a custom user agent + headers = {"User-Agent": "python/request/jupyter"} + response = requests.get(url, headers=headers, allow_redirects=True) + + if response.status_code != 200: + print(f"Failed to retrieve the webpage. Status code: {response.status_code}") + exit(1) + + if "A required part of this site couldn’t load" in response.text: + print("Fastly is blocking us. Status code: 403") + exit(1) + + # Parse the HTML content + soup = BeautifulSoup(response.content, "html.parser") + + # Find all <h3> tags and accumulate their text in a list + h3_tags = [h3.get_text(strip=True) for h3 in soup.find_all("h3")] + + # Sort the list of <h3> contents + h3_tags.sort() + + if not h3_tags: + print("No packages found") + exit(1) + return h3_tags + + +default_orgs = [ + "binder-examples", + "binderhub-ci-repos", + "ipython", + "jupyter", + "jupyter-attic", + "jupyter-book", + "jupyter-governance", + "jupyter-incubator", + "jupyter-resources", + "jupyter-server", + "jupyter-standard", + "jupyter-standards", + "jupyter-widgets", + "jupyter-xeus", + "jupytercon", + "jupyterhub", + "jupyterlab", + "voila-dashboards", + "voila-gallery", + "pickleshare", +] + +token = os.getenv("GH_TOKEN") +if not token: + print("[red]Error: GH_TOKEN environment variable not set[/red]") + exit(1) + +headers = { + "Authorization": f"token {token}", + "Accept": "application/vnd.github.v3+json", +} + + +async def list_repos(orgs): + async with trio.open_nursery() as nursery: + results = [] + for org in orgs: + + async def _loc(results, org): + results.append(await list_repos_for_org(org)) + + nursery.start_soon(_loc, results, org) + for org_repos in results: + for org, repo in org_repos: + yield org, repo + + +async def list_repos_for_org(org): + reps = [] + for p in range(1, 10): + response = await asks.get( + f"https://api.github.com/orgs/{org}/repos?per_page=100&page={p}", + headers=headers, + ) + response.raise_for_status() + repos = response.json() + for repo in repos: + reps.append((org, repo["name"])) + if len(repos) < 100: + break + return reps + + +async def main(): + + packages = get_packages(f"https://pypi.org/org/jupyter/") + print(f"Found {len(packages)} packages in the pypi jupyter org") + + map = {p.lower().replace("-", "_"): p for p in packages} + + todo = [] + async for org, repo in list_repos(default_orgs): + lowname = repo.lower().replace("-", "_") + if lowname in map: + print( + f"{org}/{repo}".ljust(40), + f"https://pypi.org/project/{map[lowname]}", + " in jupyter org", + ) + del map[lowname] + else: + todo.append((org, repo)) + + print() + print("check potentially matching Pypi names:") + + async with trio.open_nursery() as nursery: + targets = [] + for org, repo in todo: + + async def _loc(targets, org, repo): + targets.append( + ( + org, + repo, + ( + await asks.get(f"https://pypi.org/pypi/{repo}/json") + ).status_code, + ) + ) + + nursery.start_soon(_loc, targets, org, repo) + corg = "" + for org, repo, status in sorted(targets): + if org != corg: + print() + corg = org + if status == 200: + print( + f"https://github.com/{org}/{repo}".ljust(70), + f"{status} for https://pypi.org/project/{repo}", + ) + + print() + print("repos with no Pypi package:") + corg = "" + for org, repo, status in sorted(targets): + if org != corg: + print() + corg = org + if status != 200: + print(f"https://github.com/{org}/{repo}") + + print() + print("Packages with no repos.") + print(map) + + +trio.run(main) diff --git a/tools/private_sec_report.py b/tools/private_sec_report.py index 3e6cdbf..e77f2a0 100644 --- a/tools/private_sec_report.py +++ b/tools/private_sec_report.py @@ -72,11 +72,18 @@ async def get_private_report(session, org, repo): ) as repo_response: repo_info = await repo_response.json() archived = repo_info.get("archived", False) + private = repo_info.get("private", False) async with session.get(private_report_url, headers=headers) as response: if response.status == 200: - return org, repo, (await response.json()).get("enabled", False), archived + return ( + org, + repo, + (await response.json()).get("enabled", False), + archived, + private, + ) else: - return org, repo, False, archived + return org, repo, False, archived, private async def main(): @@ -90,7 +97,7 @@ async def main(): results = await asyncio.gather(*tasks) prev_org = None - for org, repo, enabled, archived in results: + for org, repo, enabled, archived, private in results: if org != prev_org: print() print(f"[bold]{org}[/bold]") @@ -98,6 +105,8 @@ async def main(): if enabled: print(f" [green]{repo}: {enabled}[/green]") else: + if private: + print(f" [yellow]{org}/{repo}: {enabled} (private)[/yellow]") if archived: print(f" [yellow]{org}/{repo}: {enabled} (archived)[/yellow]") elif f"{org}/{repo}" in ignore_repos: diff --git a/tools/tide.py b/tools/tide.py new file mode 100644 index 0000000..464920a --- /dev/null +++ b/tools/tide.py @@ -0,0 +1,119 @@ +# https://packaging.python.org/en/latest/specifications/inline-script-metadata/ +# /// script +# requires-python = ">=3.12" +# dependencies = [ +# "requests", +# "rich", +# "beautifulsoup4", +# ] +# /// +import requests +from rich import print +from bs4 import BeautifulSoup +import sys +from rich.table import Table + + +def get_packages(url): + # Send a GET request to the webpage with a custom user agent + headers = {"User-Agent": "python/request/jupyter"} + response = requests.get(url, headers=headers, allow_redirects=True) + + if response.status_code != 200: + print(f"Failed to retrieve the webpage. Status code: {response.status_code}") + exit(1) + + if "A required part of this site couldn’t load" in response.text: + print(f"Fastly is blocking us for {url}. Status code: 403") + exit(1) + + # Parse the HTML content + soup = BeautifulSoup(response.content, "html.parser") + + # Find all <h3> tags and accumulate their text in a list + h3_tags = [h3.get_text(strip=True) for h3 in soup.find_all("h3")] + + # Sort the list of <h3> contents + h3_tags.sort() + + if not h3_tags: + print("No packages found") + exit(1) + return h3_tags + + +def get_tidelift_data(packages): + packages_data = [{"platform": "pypi", "name": h3} for h3 in packages] + + data = {"packages": packages_data} + res = requests.post( + "https://tidelift.com/api/depci/estimate/bulk_estimates", json=data + ) + + res.raise_for_status() + + # Collecting all package data for aligned printing + package_data = [] + response_data = res.json() + + for package in response_data: + name = package["name"] + lifted = package["lifted"] + estimated_money = package["estimated_money"] + package_data.append((name, lifted, estimated_money)) + + package_names = {p["name"] for p in response_data} + for package in packages: + if package not in package_names: + package_data.append((package, None, None)) + + # Print the collected data in aligned columns + + # Create a table for aligned output + table = Table(show_header=True, header_style="bold magenta") + table.add_column("Package Name") + table.add_column("Estimated Money") + table.add_column("Lifted") + + def maybefloat(x): + if x is None: + return 0 + try: + return float(x) + except TypeError: + return 0 + + package_data.sort( + key=lambda x: (x[1] is None, x[1], -maybefloat(x[2]), x[0]) + ) # sort lifted True first, then None, then False, then amount, then by name + for name, lifted, estimated_money in package_data: + if lifted: + table.add_row(name, "-- need login ––", f"[green]{lifted}[/green]") + else: + table.add_row(name, str(estimated_money), f"[red]{lifted}[/red]") + + print(table) + + +if __name__ == "__main__": + # URL of the webpage + args = sys.argv[1:] + packages = [] + while args: + if args[0] == "--org": + url = f"https://pypi.org/org/{args[1]}/" + packages += get_packages(url) + args = args[2:] + elif args[0] == "--user": + url = f"https://pypi.org/user/{args[1]}/" + packages += get_packages(url) + args = args[2:] + elif args[0] == "--packages": + packages += args[1:] + args = [] + else: + print( + "Invalid argument. Please use either --org ORG, --user USER or --packages PACKAGE1 PACKAGE2 ..." + ) + exit(1) + get_tidelift_data(packages)