@@ -26,45 +26,47 @@ class FileSystemStats:
2626class FileSystemNode (Source ): # pylint: disable=too-many-instance-attributes
2727 """Base class for filesystem nodes (files, directories, symlinks)."""
2828
29- name : str = ""
30- path_str : str = ""
31- path : Path | None = None
29+ # Required fields - use None defaults and validate in __post_init__
30+ name : str | None = None
31+ path_str : str | None = None
32+ path : "Path | None" = None
33+
34+ # Optional fields with sensible defaults
3235 size : int = 0
3336 file_count : int = 0
3437 dir_count : int = 0
3538 depth : int = 0
3639 children : list [FileSystemNode ] = field (default_factory = list )
37-
38- @property
39- def tree (self ) -> str :
40- """Return the name of this node."""
41- return self .name
42-
43- def render_tree (self , prefix : str = "" , * , is_last : bool = True ) -> list [str ]:
44- """Return default tree representation with just the name."""
45- current_prefix = "└── " if is_last else "├── "
46- return [f"{ prefix } { current_prefix } { self .name } " ]
40+
41+ # Class attribute for display type name (instead of fragile string manipulation)
42+ _display_type : str = "NODE"
43+
44+ def __post_init__ (self ) -> None :
45+ """Validate required fields after initialization."""
46+ if self .name is None :
47+ raise ValueError ("FileSystemNode requires 'name' field" )
48+ if self .path_str is None :
49+ raise ValueError ("FileSystemNode requires 'path_str' field" )
50+ if self .path is None :
51+ raise ValueError ("FileSystemNode requires 'path' field" )
4752
4853 def sort_children (self ) -> None :
4954 """Sort the children nodes of a directory according to a specific order."""
5055
5156 def _sort_key (child : FileSystemNode ) -> tuple [int , str ]:
52- name = child .name .lower ()
53- # Each child knows its own sort priority - polymorphism!
54- priority = child .get_sort_priority ()
55- if priority == 0 and (name == "readme" or name .startswith ("readme." )):
56- return (0 , name )
57- if priority == 0 : # Files
57+ name = (child .name or "" ).lower ()
58+ # Files have priority 0, others have priority 1
59+ # Use string comparison to avoid circular import
60+ if child .__class__ .__name__ == "FileSystemFile" :
61+ priority = 0
62+ if name == "readme" or name .startswith ("readme." ):
63+ return (0 , name )
5864 return (1 if not name .startswith ("." ) else 2 , name )
5965 # Directories, symlinks, etc.
6066 return (3 if not name .startswith ("." ) else 4 , name )
6167
6268 self .children .sort (key = _sort_key )
6369
64- def get_sort_priority (self ) -> int :
65- """Return sort priority. Override in subclasses."""
66- return 1 # Default: not a file
67-
6870 @property
6971 def content_string (self ) -> str :
7072 """Return the content of the node as a string, including path and content.
@@ -75,46 +77,60 @@ def content_string(self) -> str:
7577 A string representation of the node's content.
7678
7779 """
78- type_name = self .__class__ .__name__ .upper ().replace ("FILESYSTEM" , "" )
80+ # Use class attribute instead of fragile string manipulation
81+ type_name = self ._display_type
7982
8083 parts = [
8184 SEPARATOR ,
82- f"{ type_name } : { str (self .path_str ).replace (os .sep , '/' )} " ,
85+ f"{ type_name } : { str (self .path_str or '' ).replace (os .sep , '/' )} " ,
8386 SEPARATOR ,
8487 f"{ self .content } " ,
8588 ]
8689
8790 return "\n " .join (parts ) + "\n \n "
8891
8992 def get_content (self ) -> str :
90- """Return file content. Override in subclasses for specific behavior."""
91- if self .path is None :
93+ """Return file content with proper encoding detection."""
94+ from gitingest .utils .file_utils import _decodes , _get_preferred_encodings , _read_chunk
95+ from gitingest .utils .notebook import process_notebook
96+
97+ if not self .path :
9298 return "Error: No path specified"
9399
94- try :
95- return self .path .read_text (encoding = "utf-8" )
96- except Exception as e :
97- return f"Error reading content of { self .name } : { e } "
100+ # Handle notebook files specially
101+ if self .path .suffix == ".ipynb" :
102+ try :
103+ return process_notebook (self .path )
104+ except Exception as exc :
105+ return f"Error processing notebook: { exc } "
106+
107+ # Read a chunk to check if it's binary or text
108+ chunk = _read_chunk (self .path )
98109
99- def get_summary_info (self ) -> str :
100- """Return summary information. Override in subclasses."""
101- return ""
110+ if chunk is None :
111+ return "Error reading file"
102112
103- def is_single_file (self ) -> bool :
104- """Return whether this node represents a single file."""
105- return False
113+ if chunk == b"" :
114+ return "[Empty file]"
106115
107- def gather_contents ( self ) -> str :
108- """Gather file contents. Override in subclasses."""
109- return self . content_string
116+ # Check if it's binary
117+ if not _decodes ( chunk , "utf-8" ):
118+ return "[Binary file]"
110119
111- def get_display_name (self ) -> str :
112- """Get display name for tree view. Override in subclasses."""
113- return self .name
120+ # Find the first encoding that decodes the sample
121+ good_enc : str | None = next (
122+ (enc for enc in _get_preferred_encodings () if _decodes (chunk , encoding = enc )),
123+ None ,
124+ )
114125
115- def has_children (self ) -> bool :
116- """Return whether this node has children to display."""
117- return False
126+ if good_enc is None :
127+ return "Error: Unable to decode file with available encodings"
128+
129+ try :
130+ with self .path .open (encoding = good_enc ) as fp :
131+ return fp .read ()
132+ except (OSError , UnicodeDecodeError ) as exc :
133+ return f"Error reading file with { good_enc !r} : { exc } "
118134
119135 @property
120136 def content (self ) -> str :
@@ -125,109 +141,38 @@ def content(self) -> str:
125141@dataclass
126142class FileSystemFile (FileSystemNode ):
127143 """Represents a file in the filesystem."""
128-
129- def get_sort_priority (self ) -> int :
130- """Files have priority 0 for sorting."""
131- return 0
132-
133- def get_summary_info (self ) -> str :
134- """Return file summary information."""
135- return f"File: { self .name } \n Lines: { len (self .content .splitlines ()):,} \n "
136-
137- def is_single_file (self ) -> bool :
138- """Files are single files."""
139- return True
140-
141- def render_tree (self , prefix : str = "" , * , is_last : bool = True ) -> list [str ]:
142- """Render the tree representation of this file."""
143- current_prefix = "└── " if is_last else "├── "
144- return [f"{ prefix } { current_prefix } { self .name } " ]
144+
145+ _display_type : str = "FILE"
145146
146147
147148@dataclass
148149class FileSystemDirectory (FileSystemNode ):
149150 """Represents a directory in the filesystem."""
150151
151152 file_count_total : int = 0
153+ _display_type : str = "DIRECTORY"
152154
153155 def get_content (self ) -> str :
154156 """Directories cannot have content."""
155157 msg = "Cannot read content of a directory node"
156158 raise ValueError (msg )
157159
158- def get_summary_info (self ) -> str :
159- """Return directory summary information."""
160- return f"Files analyzed: { self .file_count } \n "
161-
162- def gather_contents (self ) -> str :
163- """Recursively gather contents of all files under this directory."""
164- return "\n " .join (child .gather_contents () for child in self .children )
165-
166- def get_display_name (self ) -> str :
167- """Directories get a trailing slash."""
168- return self .name + "/"
169-
170- def has_children (self ) -> bool :
171- """Directories have children if the list is not empty."""
172- return bool (self .children )
173-
174- def render_tree (self , prefix : str = "" , * , is_last : bool = True ) -> list [str ]:
175- """Render the tree representation of this directory."""
176- lines = []
177- current_prefix = "└── " if is_last else "├── "
178- display_name = self .name + "/"
179- lines .append (f"{ prefix } { current_prefix } { display_name } " )
180- if hasattr (self , "children" ) and self .children :
181- new_prefix = prefix + (" " if is_last else "│ " )
182- for i , child in enumerate (self .children ):
183- is_last_child = i == len (self .children ) - 1
184- lines .extend (child .render_tree (prefix = new_prefix , is_last = is_last_child ))
185- return lines
186-
187- @property
188- def tree (self ) -> str :
189- """Return the tree representation of this directory."""
190- return "\n " .join (self .render_tree ())
191-
192160
193161@dataclass
194162class GitRepository (FileSystemDirectory ):
195163 """A directory that contains a .git folder, representing a Git repository."""
196164
197165 git_info : dict = field (default_factory = dict ) # Store git metadata like branch, commit, etc.
198-
199- def render_tree (self , prefix : str = "" , * , is_last : bool = True ) -> list [str ]:
200- """Render the tree representation of this git repository."""
201- lines = []
202- current_prefix = "└── " if is_last else "├── "
203- # Mark as git repo in the tree
204- display_name = f"{ self .name } / (git repository)"
205- lines .append (f"{ prefix } { current_prefix } { display_name } " )
206- if hasattr (self , "children" ) and self .children :
207- new_prefix = prefix + (" " if is_last else "│ " )
208- for i , child in enumerate (self .children ):
209- is_last_child = i == len (self .children ) - 1
210- lines .extend (child .render_tree (prefix = new_prefix , is_last = is_last_child ))
211- return lines
166+ _display_type : str = "GIT_REPOSITORY"
212167
213168
214169@dataclass
215170class FileSystemSymlink (FileSystemNode ):
216171 """Represents a symbolic link in the filesystem."""
217172
218173 target : str = ""
219- # Add symlink-specific fields if needed
174+ _display_type : str = "SYMLINK"
220175
221176 def get_content (self ) -> str :
222177 """Symlinks content is what they point to."""
223178 return self .target
224-
225- def get_display_name (self ) -> str :
226- """Symlinks show target."""
227- return f"{ self .name } -> { self .target } "
228-
229- def render_tree (self , prefix : str = "" , * , is_last : bool = True ) -> list [str ]:
230- """Render the tree representation of this symlink."""
231- current_prefix = "└── " if is_last else "├── "
232- display_name = f"{ self .name } -> { self .target } " if self .target else self .name
233- return [f"{ prefix } { current_prefix } { display_name } " ]
0 commit comments