Documentation-Generation/run_docgen_cli.py at main · zeel2104/Documentation-Generation · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# 📄 File: run_docgen_cli.py

from group_chunks import group_related_files
from build_dependency_graph import build_dependency_graph
from parse_all import parse_all_files
from docgen_from_github import checkout_branch, detect_languages, clone_repo, get_repo_branches
from utils.file_filters import filter_chunks_with_gpt, get_chunk_group_map, convert_chunks_to_list_of_sets
from generate_prompt_from_chunk import generate_prompt_from_chunk
from summarizers.summarize_group_with_gpt import summarize_files_with_gpt

def run_docgen(github_url):
    #github_url = input("🔗 Enter GitHub repo URL: ").strip()
    repo = clone_repo(github_url)
    branches = get_repo_branches(repo)

    files_by_language = detect_languages(repo)
    dep_graph = build_dependency_graph(files_by_language, repo)

    chunks, group_map = group_related_files(dep_graph)


    all_structures = parse_all_files(files_by_language, repo)
    structure_lookup = {item["file"]: item for item in all_structures}

    chunks = filter_chunks_with_gpt(chunks, structure_lookup)

    gpt_summaries = summarize_files_with_gpt(chunks, structure_lookup, repo)
    gpt_summary_map = {
    entry["file"].replace("\\", "/"): entry.get("summary", "No content available")
    for entry in gpt_summaries
    }


    summaries = []
    json_chunks = []

    for chunk in chunks:
        chunk_files = list(chunk)
        json_chunks.append(chunk_files)
        chunk_structures = [structure_lookup[f] for f in chunk_files if f in structure_lookup]

        try:
            prompt = generate_prompt_from_chunk(chunk_structures)
            pretty_prompt = "\n".join(line.strip() for line in prompt.strip().split("\n"))
        except Exception as e:
            pretty_prompt = "No content available"

        # Attach 1-line summary if available
        oneline_summary = None
        if len(chunk_files) == 1:
            normalized_file = chunk_files[0].replace("\\", "/")
            oneline_summary = gpt_summary_map.get(normalized_file, "No content available")


        summaries.append({
            "file": chunk_files,
            "summary": pretty_prompt,
            "oneline": oneline_summary
        })

    output = {
        "branches": branches,
        "chunks": json_chunks,
        "groups": group_map,
        "summaries": summaries
    }

    return output


def run_docgen_for_existing_repo(branch_name):

    repo_path = "cloned_repo"

    # ✅ Switch to the specified branch
    checkout_branch(repo_path, branch_name)

    files_by_language = detect_languages(repo_path)
    dep_graph = build_dependency_graph(files_by_language, repo_path)
    chunks, group_map = group_related_files(dep_graph)

    all_structures = parse_all_files(files_by_language, repo_path)
    structure_lookup = {item["file"]: item for item in all_structures}

    chunks = filter_chunks_with_gpt(chunks, structure_lookup)

    gpt_summaries = summarize_files_with_gpt(chunks, structure_lookup, repo_path)
    gpt_summary_map = {
        entry["file"].replace("\\", "/"): entry.get("summary", "No content available")
        for entry in gpt_summaries
    }

    summaries = []
    json_chunks = []

    for chunk in chunks:
        chunk_files = list(chunk)
        json_chunks.append(chunk_files)
        chunk_structures = [structure_lookup[f] for f in chunk_files if f in structure_lookup]

        try:
            prompt = generate_prompt_from_chunk(chunk_structures)
            pretty_prompt = "\n".join(line.strip() for line in prompt.strip().split("\n"))
        except Exception as e:
            pretty_prompt = "No content available"

        # Attach 1-line summary if available
        oneline_summary = None
        if len(chunk_files) == 1:
            normalized_file = chunk_files[0].replace("\\", "/")
            oneline_summary = gpt_summary_map.get(normalized_file, "No content available")

        summaries.append({
            "file": chunk_files,
            "summary": pretty_prompt,
            "oneline": oneline_summary
        })

    return {
        "branch_used": branch_name,
        "chunks": json_chunks,
        "groups": group_map,
        "summaries": summaries
    }