11package mcp.code.analysis.service
22
33import java.io.File
4+ import kotlin.text.lines
45import org.slf4j.Logger
56import org.slf4j.LoggerFactory
67
8+ data class LanguagePatterns (
9+ val definitionPattern : Regex ,
10+ val commentPrefixes : List <String >,
11+ val blockCommentStart : String ,
12+ val blockCommentEnd : String ,
13+ )
14+
15+ data class State (val lines : List <String > = emptyList(), val inCommentBlock : Boolean = false )
16+
717/* *
818 * Responsible for analyzing the structure of a codebase. Identifies files, directories, and their respective metadata
919 * such as size, language, imports, and declarations.
@@ -23,37 +33,182 @@ data class CodeAnalyzer(
2333 fun analyzeStructure (repoDir : File ): Map <String , Any > = processDirectory(repoDir, repoDir.absolutePath)
2434
2535 /* *
26- * Collects all code snippets from the repository.
36+ * Collects summarized code snippets from the repository.
2737 *
2838 * @param repoDir The root directory of the repository
29- * @return List of code snippets with metadata including file path and language
39+ * @param maxLines The maximum number of lines to include per file summary
40+ * @return List of code summaries with metadata
3041 */
31- fun collectAllCodeSnippets (repoDir : File ): List <String > =
42+ fun collectSummarizedCodeSnippets (repoDir : File , maxLines : Int = 100 ): List <String > =
3243 findCodeFiles(repoDir)
3344 .filter { file ->
34- file.extension.lowercase() in setOf (" kt" , " java" , " scala" , " py" , " rb" , " js" , " ts" , " go" , " c" , " cpp" , " rust " ) &&
45+ file.extension.lowercase() in setOf (" kt" , " java" , " scala" , " py" , " rb" , " js" , " ts" , " go" , " c" , " cpp" , " rs " ) &&
3546 ! file.absolutePath.contains(" test" , ignoreCase = true )
3647 }
3748 .map { file ->
38- val relativePath = file.absolutePath.substring (repoDir.absolutePath.length + 1 )
49+ val relativePath = file.absolutePath.removePrefix (repoDir.absolutePath).removePrefix( " / " )
3950 val lang = getLanguageFromExtension(file.extension)
40- val content = file.readLines().joinToString(" \n " )
41- """ |--- File: $relativePath
42- |~~~$lang
43- |$content
44- |~~~"""
45- .trimMargin()
51+ val content = file.readText()
52+ summarizeCodeContent(relativePath, lang, content, maxLines)
4653 }
47- .toList()
4854 .also { snippets ->
49- logger.info(" Collected ${snippets.size} code snippets from ${repoDir.absolutePath} " )
55+ logger.info(" Collected ${snippets.size} summarized snippets from ${repoDir.absolutePath} " )
5056 logger.debug(
5157 """ |Snippets Found:
52- |${snippets.joinToString(" \n " )} """
58+ |${snippets.joinToString(" \n " )}
59+ |"""
5360 .trimMargin()
5461 )
5562 }
5663
64+ /* *
65+ * Summarizes the content of a file.
66+ *
67+ * @param path The path of the file
68+ * @param language The language of the file
69+ * @param content The content of the file
70+ * @param maxLines The maximum number of lines to include in the summary
71+ */
72+ fun summarizeCodeContent (path : String , language : String , content : String , maxLines : Int = 250): String {
73+
74+ val patterns =
75+ when (language.lowercase()) {
76+ " kotlin" ->
77+ LanguagePatterns (
78+ Regex (
79+ """ (class|interface|object|enum class|data class|sealed class|fun|val|var|const|typealias|annotation class).*"""
80+ ),
81+ listOf (" //" ),
82+ " /*" ,
83+ " */" ,
84+ )
85+
86+ " scala" ->
87+ LanguagePatterns (
88+ Regex (
89+ """ (class|object|trait|case class|case object|def|val|var|lazy val|type|implicit|sealed|abstract|override|package object).*"""
90+ ),
91+ listOf (" //" ),
92+ " /*" ,
93+ " */" ,
94+ )
95+
96+ " java" ->
97+ LanguagePatterns (
98+ Regex (
99+ """ (class|interface|enum|@interface|record|public|private|protected|static|abstract|final|synchronized|volatile|native|transient|strictfp).*"""
100+ ),
101+ listOf (" //" ),
102+ " /*" ,
103+ " */" ,
104+ )
105+
106+ " python" ->
107+ LanguagePatterns (Regex (""" (def|class|async def|@|import|from).*""" ), listOf (" #" ), " \"\"\" " , " \"\"\" " )
108+
109+ " ruby" ->
110+ LanguagePatterns (
111+ Regex (""" (def|class|module|attr_|require|include|extend).*""" ),
112+ listOf (" #" ),
113+ " =begin" ,
114+ " =end" ,
115+ )
116+
117+ " javascript" ,
118+ " typescript" ->
119+ LanguagePatterns (
120+ Regex (""" (function|class|const|let|var|import|export|interface|type|enum|namespace).*""" ),
121+ listOf (" //" ),
122+ " /*" ,
123+ " */" ,
124+ )
125+
126+ " go" ->
127+ LanguagePatterns (
128+ Regex (""" (func|type|struct|interface|package|import|var|const).*""" ),
129+ listOf (" //" ),
130+ " /*" ,
131+ " */" ,
132+ )
133+
134+ " rust" ->
135+ LanguagePatterns (
136+ Regex (""" (fn|struct|enum|trait|impl|pub|use|mod|const|static|type|async|unsafe).*""" ),
137+ listOf (" //" ),
138+ " /*" ,
139+ " */" ,
140+ )
141+
142+ " c" ,
143+ " cpp" ->
144+ LanguagePatterns (
145+ Regex (""" (class|struct|enum|typedef|namespace|template|void|int|char|bool|auto|extern|static|virtual).*""" ),
146+ listOf (" //" ),
147+ " /*" ,
148+ " */" ,
149+ )
150+
151+ // Default fallback for other languages
152+ else ->
153+ LanguagePatterns (
154+ Regex (""" (class|interface|object|enum|fun|def|function|public|private|protected|static).*""" ),
155+ listOf (" //" , " #" ),
156+ " /*" ,
157+ " */" ,
158+ )
159+ }
160+
161+ val definitionPattern = patterns.definitionPattern
162+ val commentPrefixes = patterns.commentPrefixes
163+ val blockCommentStart = patterns.blockCommentStart
164+ val blockCommentEnd = patterns.blockCommentEnd
165+
166+ val isDefinition: (String ) -> Boolean = { line -> line.trim().matches(definitionPattern) }
167+
168+ val isCommentLine: (String ) -> Boolean = { line ->
169+ val trimmed = line.trim()
170+ commentPrefixes.any { trimmed.startsWith(it) } || trimmed.startsWith(blockCommentStart) || trimmed.startsWith(" *" )
171+ }
172+
173+ val processDefinitionLine: (String ) -> String = { line ->
174+ val trimmed = line.trim()
175+ if (trimmed.contains(" {" ) && ! trimmed.contains(" }" )) " $trimmed }" else trimmed
176+ }
177+
178+ val finalState =
179+ content.lines().fold(State ()) { state, line ->
180+ if (state.lines.size >= maxLines) return @fold state
181+ val trimmed = line.trim()
182+ val nextInCommentBlock =
183+ when {
184+ trimmed.startsWith(blockCommentStart) -> true
185+ trimmed.endsWith(blockCommentEnd) -> false
186+ language.lowercase() == " python" && trimmed == " \"\"\" " -> ! state.inCommentBlock
187+ else -> state.inCommentBlock
188+ }
189+
190+ val shouldIncludeLine = isDefinition(line) || isCommentLine(line) || state.inCommentBlock
191+ val updatedLines =
192+ if (shouldIncludeLine) {
193+ if (isDefinition(line)) {
194+ // Apply processing to definition lines to ensure braces are complete
195+ state.lines + processDefinitionLine(line)
196+ } else {
197+ state.lines + line
198+ }
199+ } else state.lines
200+
201+ State (updatedLines, nextInCommentBlock)
202+ }
203+
204+ // Ensure we're using the correct file path and language
205+ return """ |### File: $path
206+ |~~~$language
207+ |${finalState.lines.joinToString(" \n " )}
208+ |~~~"""
209+ .trimMargin()
210+ }
211+
57212 /* *
58213 * Finds the README file in the repository.
59214 *
0 commit comments