-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_docgen_cli.py
More file actions
124 lines (93 loc) · 4.08 KB
/
run_docgen_cli.py
File metadata and controls
124 lines (93 loc) · 4.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# 📄 File: run_docgen_cli.py
from group_chunks import group_related_files
from build_dependency_graph import build_dependency_graph
from parse_all import parse_all_files
from docgen_from_github import checkout_branch, detect_languages, clone_repo, get_repo_branches
from utils.file_filters import filter_chunks_with_gpt, get_chunk_group_map, convert_chunks_to_list_of_sets
from generate_prompt_from_chunk import generate_prompt_from_chunk
from summarizers.summarize_group_with_gpt import summarize_files_with_gpt
def run_docgen(github_url):
#github_url = input("🔗 Enter GitHub repo URL: ").strip()
repo = clone_repo(github_url)
branches = get_repo_branches(repo)
files_by_language = detect_languages(repo)
dep_graph = build_dependency_graph(files_by_language, repo)
chunks, group_map = group_related_files(dep_graph)
all_structures = parse_all_files(files_by_language, repo)
structure_lookup = {item["file"]: item for item in all_structures}
chunks = filter_chunks_with_gpt(chunks, structure_lookup)
gpt_summaries = summarize_files_with_gpt(chunks, structure_lookup, repo)
gpt_summary_map = {
entry["file"].replace("\\", "/"): entry.get("summary", "No content available")
for entry in gpt_summaries
}
summaries = []
json_chunks = []
for chunk in chunks:
chunk_files = list(chunk)
json_chunks.append(chunk_files)
chunk_structures = [structure_lookup[f] for f in chunk_files if f in structure_lookup]
try:
prompt = generate_prompt_from_chunk(chunk_structures)
pretty_prompt = "\n".join(line.strip() for line in prompt.strip().split("\n"))
except Exception as e:
pretty_prompt = "No content available"
# Attach 1-line summary if available
oneline_summary = None
if len(chunk_files) == 1:
normalized_file = chunk_files[0].replace("\\", "/")
oneline_summary = gpt_summary_map.get(normalized_file, "No content available")
summaries.append({
"file": chunk_files,
"summary": pretty_prompt,
"oneline": oneline_summary
})
output = {
"branches": branches,
"chunks": json_chunks,
"groups": group_map,
"summaries": summaries
}
return output
def run_docgen_for_existing_repo(branch_name):
repo_path = "cloned_repo"
# ✅ Switch to the specified branch
checkout_branch(repo_path, branch_name)
files_by_language = detect_languages(repo_path)
dep_graph = build_dependency_graph(files_by_language, repo_path)
chunks, group_map = group_related_files(dep_graph)
all_structures = parse_all_files(files_by_language, repo_path)
structure_lookup = {item["file"]: item for item in all_structures}
chunks = filter_chunks_with_gpt(chunks, structure_lookup)
gpt_summaries = summarize_files_with_gpt(chunks, structure_lookup, repo_path)
gpt_summary_map = {
entry["file"].replace("\\", "/"): entry.get("summary", "No content available")
for entry in gpt_summaries
}
summaries = []
json_chunks = []
for chunk in chunks:
chunk_files = list(chunk)
json_chunks.append(chunk_files)
chunk_structures = [structure_lookup[f] for f in chunk_files if f in structure_lookup]
try:
prompt = generate_prompt_from_chunk(chunk_structures)
pretty_prompt = "\n".join(line.strip() for line in prompt.strip().split("\n"))
except Exception as e:
pretty_prompt = "No content available"
# Attach 1-line summary if available
oneline_summary = None
if len(chunk_files) == 1:
normalized_file = chunk_files[0].replace("\\", "/")
oneline_summary = gpt_summary_map.get(normalized_file, "No content available")
summaries.append({
"file": chunk_files,
"summary": pretty_prompt,
"oneline": oneline_summary
})
return {
"branch_used": branch_name,
"chunks": json_chunks,
"groups": group_map,
"summaries": summaries
}