Skip to content

Commit 85ffb34

Browse files
feat: implement streaming file I/O and refine async infrastructure
- Implement high-performance streaming file I/O in Rust (classic-file-io-core) with Python bindings. - Add Python wrappers and sync adapters for streaming I/O in ClassicLib.FileIO. - Refactor AsyncBridge and AsyncUtilities for improved stability and performance. - Update PapyrusLog monitoring to utilize new async patterns. - Add comprehensive tests for streaming I/O and updated async logic. - Add codebase inefficiency report to documentation.
1 parent d733632 commit 85ffb34

34 files changed

Lines changed: 747 additions & 206 deletions

ClassicLib/AsyncBridge.py

Lines changed: 16 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -729,62 +729,25 @@ def smart_await[T](coro: Coroutine[Any, Any, T]) -> T:
729729
)
730730

731731

732-
def create_sync_wrapper[T](async_func: Callable[..., Coroutine[Any, Any, T]]) -> Callable[..., T]:
732+
def create_sync_wrapper[T](async_func: Callable[..., Coroutine[Any, Any, T]], strict: bool = False) -> Callable[..., T]:
733733
"""
734734
Create a sync wrapper for an async function with context-aware execution.
735735
736736
This wrapper automatically chooses the appropriate async execution method:
737737
- GUI mode: Uses AsyncBridge (Qt event loop integration)
738738
- CLI/TUI mode: Uses asyncio.run() (creates new event loop per call)
739739
740-
IMPORTANT - Appropriate Usage:
741-
✅ GUI workers (Qt threads, PySide6 slots)
742-
✅ Testing and benchmarking isolated async functions
743-
✅ One-off operations in sync contexts (initialization, cleanup)
744-
745-
❌ DO NOT USE in production CLI main flow
746-
❌ DO NOT USE when already in async context
747-
❌ DO NOT USE for repeated operations in CLI (inefficient)
748-
749-
Best Practices:
750-
- Production CLI code should be async-first (use asyncio.run() once at entry point)
751-
- See CLASSIC_ScanLogs.py for reference async-first CLI pattern
752-
- In CLI, call async methods directly with await instead of using wrappers
753-
- Sync wrappers are primarily for GUI thread safety and testing purposes
754-
755-
Usage:
756-
# Example 1: GUI worker (CORRECT)
757-
class CrashLogsScanWorker(QThread):
758-
def _perform_scan(self):
759-
sync_scan = create_sync_wrapper(async_scan_function)
760-
result = sync_scan() # Uses AsyncBridge in GUI mode
761-
762-
# Example 2: Testing (CORRECT)
763-
def test_async_function():
764-
sync_wrapper = create_sync_wrapper(async_function)
765-
result = sync_wrapper() # Uses asyncio.run() in CLI mode
766-
767-
# Example 3: CLI production (INCORRECT - don't do this)
768-
def main():
769-
sync_wrapper = create_sync_wrapper(async_function)
770-
result = sync_wrapper() # Creates new event loop per call!
771-
772-
# Example 4: CLI production (CORRECT - do this instead)
773-
async def main():
774-
result = await async_function() # Direct async, one event loop
775-
776-
if __name__ == "__main__":
777-
asyncio.run(main()) # Single event loop at entry point
778-
779740
Args:
780741
async_func: The async function to wrap
742+
strict: If True, raises RuntimeError in CLI/TUI mode instead of falling back
743+
to asyncio.run(). Use this for functions that must only be called
744+
in GUI contexts to prevent performance "footguns".
781745
782746
Returns:
783-
A sync wrapper that works in both GUI and CLI modes
747+
A sync wrapper that works in both GUI and CLI modes (unless strict=True)
784748
785-
Note:
786-
The CLI/TUI mode asyncio.run() fallback is intentional for testing
787-
and benchmarking. Production CLI code should not rely on this pattern.
749+
Raises:
750+
RuntimeError: If strict=True and called in CLI/TUI mode.
788751
"""
789752
import asyncio
790753

@@ -796,6 +759,15 @@ def wrapper(*args: Any, **kwargs: Any) -> T:
796759
# GUI mode: Use AsyncBridge for Qt event loop integration
797760
bridge = AsyncBridge.get_instance()
798761
return bridge.run_async(coro)
762+
763+
# Strict mode check - prevent inefficient usage in CLI
764+
if strict:
765+
raise RuntimeError(
766+
f"Strict mode: Cannot use sync wrapper for '{async_func.__name__}' in CLI/TUI mode.\n"
767+
"This function creates a new event loop for every call, which is inefficient.\n"
768+
"Use 'await' and call the async function directly instead."
769+
)
770+
799771
# CLI/TUI mode: Use standard asyncio.run()
800772
return asyncio.run(coro)
801773

ClassicLib/FileIO/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
read_crash_log_sync,
1717
read_file_sync,
1818
read_lines_sync,
19+
stream_lines_sync,
1920
write_bytes_sync,
2021
write_crash_report_sync,
2122
write_file_sync,
@@ -31,6 +32,7 @@
3132
# Sync adapters
3233
"read_file_sync",
3334
"read_lines_sync",
35+
"stream_lines_sync",
3436
"read_bytes_sync",
3537
"write_file_sync",
3638
"write_lines_sync",

ClassicLib/FileIO/core.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
aiofiles = None # type: ignore[assignment]
2828
AIOFILES_AVAILABLE = False
2929

30+
from collections.abc import AsyncIterator, Iterator
3031
from itertools import starmap
3132

3233
from ClassicLib.FileIO.path_utils import ensure_path
@@ -35,6 +36,7 @@
3536
# Import async utilities if available
3637
try:
3738
from ClassicLib.FileIO.Async import (
39+
open_file_with_encoding_async,
3840
read_file_with_encoding_async,
3941
)
4042

@@ -152,6 +154,59 @@ async def read_lines(self, path: Path | str) -> list[str]:
152154
content = await loop.run_in_executor(None, path.read_text, self.default_encoding, self.default_errors)
153155
return content.splitlines()
154156

157+
async def stream_lines(self, path: Path | str) -> AsyncIterator[str]:
158+
"""
159+
Asynchronously streams the contents of a file line by line.
160+
161+
This method yields lines from the file one by one, which is memory-efficient
162+
for large files. It utilizes automatic encoding detection if available.
163+
Lines are stripped of trailing newline characters for consistency with read_lines().
164+
165+
Args:
166+
path (Path | str): The path to the file to be read.
167+
168+
Yields:
169+
str: A single line from the file.
170+
"""
171+
path = FileIOCore._ensure_path(path)
172+
173+
# Use encoding detection if available
174+
if ASYNC_ENCODING_AVAILABLE:
175+
async with open_file_with_encoding_async(path) as f:
176+
async for line in f:
177+
yield line.rstrip("\n")
178+
elif AIOFILES_AVAILABLE:
179+
assert aiofiles is not None
180+
async with aiofiles.open(path, encoding=self.default_encoding, errors=self.default_errors) as f:
181+
async for line in f:
182+
yield line.rstrip("\n")
183+
else:
184+
# Fallback: Read all lines and yield them (loses streaming benefit but maintains API)
185+
lines = await self.read_lines(path)
186+
for line in lines:
187+
yield line
188+
189+
def stream_lines_sync(self, path: Path | str) -> Iterator[str]:
190+
"""
191+
Synchronously streams the contents of a file line by line.
192+
193+
This method yields lines from the file one by one, using automatic encoding
194+
detection. It is memory-efficient for large files.
195+
Lines are stripped of trailing newline characters.
196+
197+
Args:
198+
path (Path | str): The path to the file to be read.
199+
200+
Yields:
201+
str: A single line from the file.
202+
"""
203+
from ClassicLib.Utils.file_utils import open_file_with_encoding
204+
205+
path = FileIOCore._ensure_path(path)
206+
with open_file_with_encoding(path) as f:
207+
for line in f:
208+
yield line.rstrip("\n")
209+
155210
@staticmethod
156211
async def read_bytes(path: Path | str) -> bytes:
157212
"""

ClassicLib/FileIO/sync_adapters.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,13 @@ async def main():
5353
with FileIOCore for optimal performance.
5454
"""
5555

56+
from collections.abc import Iterator
5657
from pathlib import Path
5758
from typing import Any
5859

5960
from ClassicLib.AsyncBridge import create_sync_wrapper
6061
from ClassicLib.integration.factory import get_file_io
62+
from ClassicLib.Utils.file_utils import open_file_with_encoding
6163

6264

6365
# Helper to get core lazily
@@ -112,3 +114,30 @@ async def _append_file(path: Path | str, content: str) -> None:
112114
read_crash_log_sync = create_sync_wrapper(_read_crash_log)
113115
write_crash_report_sync = create_sync_wrapper(_write_crash_report)
114116
append_file_sync = create_sync_wrapper(_append_file)
117+
118+
119+
def stream_lines_sync(path: Path | str) -> Iterator[str]:
120+
"""
121+
Synchronously streams the contents of a file line by line.
122+
123+
This function yields lines from the file one by one, using automatic encoding
124+
detection. It is memory-efficient for large files and does NOT use the
125+
AsyncBridge or creating a new event loop, making it safe for simple sync loops.
126+
127+
It attempts to use the Rust-accelerated implementation if available via
128+
get_file_io(), otherwise falls back to pure Python.
129+
130+
Args:
131+
path (Path | str): The path to the file to be read.
132+
133+
Yields:
134+
str: A single line from the file.
135+
"""
136+
# Try to use FileIOCore (which might be Rust-accelerated)
137+
io_core = _core()
138+
if hasattr(io_core, "stream_lines_sync"):
139+
yield from io_core.stream_lines_sync(path)
140+
else:
141+
# Fallback for standard Python FileIOCore or generic IO
142+
with open_file_with_encoding(path) as f:
143+
yield from f

ClassicLib/PapyrusLog.py

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@
1313

1414
from ClassicLib import GlobalRegistry
1515
from ClassicLib.Constants import YAML
16-
from ClassicLib.FileIO import read_lines_sync
17-
from ClassicLib.integration.status import is_rust_accelerated
16+
from ClassicLib.FileIO import stream_lines_sync
1817
from ClassicLib.Logger import logger
1918
from ClassicLib.YamlSettingsCache import yaml_settings
2019

@@ -23,16 +22,16 @@ def papyrus_logging() -> tuple[str, int]:
2322
"""
2423
Analyzes Papyrus log files, extracting various statistics and compiling a summary.
2524
26-
This function reads a Papyrus log file using Rust-accelerated file I/O if available,
25+
This function reads a Papyrus log file using streaming file I/O to minimize memory usage,
2726
and computes key data such as the total number of dumps, stacks, warnings, and errors
2827
present in the log. It also calculates the ratio of dumps to stacks. If the log file
2928
is not found, the function provides user guidance on enabling and locating Papyrus
3029
logging.
3130
32-
Rust Acceleration:
33-
- Uses read_lines_sync for 10x faster file I/O
34-
- Automatic encoding detection (no chardet needed)
35-
- Better error handling for malformed files
31+
Optimization:
32+
- Uses stream_lines_sync for memory-efficient line-by-line processing
33+
- Automatic encoding detection
34+
- Handles multi-gigabyte logs without OOM errors
3635
3736
Returns:
3837
tuple[str, int]: A tuple containing a formatted string with log analysis
@@ -44,20 +43,15 @@ def papyrus_logging() -> tuple[str, int]:
4443
message_list: list[str] = []
4544
papyrus_path: Path | None = yaml_settings(Path, YAML.Game_Local, f"Game{GlobalRegistry.get_vr()}_Info.Docs_File_PapyrusLog")
4645

47-
# Log Rust acceleration status (only once at module level)
48-
if not hasattr(papyrus_logging, "_logged_rust_status"):
49-
if is_rust_accelerated("file_io"):
50-
logger.debug("Papyrus log reading using Rust-accelerated file I/O (10x faster)")
51-
else:
52-
logger.debug("Papyrus log reading using Python file I/O implementation")
53-
papyrus_logging._logged_rust_status = True # type: ignore
46+
# Log optimization status (only once at module level)
47+
if not hasattr(papyrus_logging, "_logged_status"):
48+
logger.debug("Papyrus log reading using Streaming I/O (Memory Efficient)")
49+
papyrus_logging._logged_status = True # type: ignore
5450

5551
count_dumps = count_stacks = count_warnings = count_errors = 0
5652
if papyrus_path and papyrus_path.exists():
57-
# Use Rust-accelerated read_lines_sync (automatic encoding detection, 10x faster)
58-
papyrus_data: list[str] = read_lines_sync(papyrus_path)
59-
60-
for line in papyrus_data:
53+
# Use streaming I/O (automatic encoding detection, memory efficient)
54+
for line in stream_lines_sync(papyrus_path):
6155
if "Dumping Stacks" in line:
6256
count_dumps += 1
6357
elif "Dumping Stack" in line:

ClassicLib/ScanLog/FormIDAnalyzer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ def formid_match(self, formids_matches: list[str], crashlog_plugins: dict[str, s
128128
RuntimeError: If called in CLI/TUI mode (use FormIDAnalyzerCore instead)
129129
"""
130130
# Use Phase 2 wrapper - errors in CLI/TUI, works in GUI
131-
wrapper = create_sync_wrapper(self._core.formid_match)
131+
wrapper = create_sync_wrapper(self._core.formid_match, strict=True)
132132
return wrapper(formids_matches, crashlog_plugins)
133133

134134
def lookup_formid_value(self, formid: str, plugin: str) -> str | None:
@@ -153,5 +153,5 @@ def lookup_formid_value(self, formid: str, plugin: str) -> str | None:
153153
RuntimeError: If called in CLI/TUI mode (use FormIDAnalyzerCore instead)
154154
"""
155155
# Use Phase 2 wrapper - errors in CLI/TUI, works in GUI
156-
wrapper = create_sync_wrapper(self._core.lookup_formid_value)
156+
wrapper = create_sync_wrapper(self._core.lookup_formid_value, strict=True)
157157
return wrapper(formid, plugin)

ClassicLib/ScanLog/ScanLogsExecutor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,7 @@ def scan_sync(self) -> ScanResult:
407407
RuntimeError: If called in CLI/TUI mode (use async methods)
408408
"""
409409
# Create wrapper per call for proper instance method binding
410-
wrapper = create_sync_wrapper(self.execute_scan)
410+
wrapper = create_sync_wrapper(self.execute_scan, strict=True)
411411
return wrapper()
412412

413413

0 commit comments

Comments
 (0)