diff --git a/crates/monty-js/src/convert.rs b/crates/monty-js/src/convert.rs index 8c8196b25..622106362 100644 --- a/crates/monty-js/src/convert.rs +++ b/crates/monty-js/src/convert.rs @@ -103,6 +103,10 @@ pub fn monty_to_js<'e>(obj: &MontyObject, env: &'e Env) -> Result env.create_string(name)?.into_unknown(env)?, + MontyObject::FileData { path, mode, .. } => { + let repr = format!("<_io.TextIOWrapper name='{path}' mode='{mode}'>"); + env.create_string(&repr)?.into_unknown(env)? + } }; Ok(JsMontyObject(unknown)) } diff --git a/crates/monty-python/src/convert.rs b/crates/monty-python/src/convert.rs index 6ba53f613..1dcb6162c 100644 --- a/crates/monty-python/src/convert.rs +++ b/crates/monty-python/src/convert.rs @@ -259,6 +259,13 @@ pub fn monty_to_py(py: Python<'_>, obj: &MontyObject, dc_registry: &DcRegistry) // Function objects are internal to the name lookup protocol and should not normally // appear as final output values. If they do, represent as a string with the function name. MontyObject::Function { name, .. } => Ok(PyString::new(py, name).into_any().unbind()), + // FileData is input-only (from host to VM) and should not appear as output. + MontyObject::FileData { path, mode, .. } => Ok(PyString::new( + py, + &format!("<_io.TextIOWrapper name='{path}' mode='{mode}'>"), + ) + .into_any() + .unbind()), } } diff --git a/crates/monty/src/builtins/mod.rs b/crates/monty/src/builtins/mod.rs index 0b7c4ba86..ab7b52817 100644 --- a/crates/monty/src/builtins/mod.rs +++ b/crates/monty/src/builtins/mod.rs @@ -21,6 +21,7 @@ mod map; mod min_max; // min and max share implementation mod next; mod oct; +mod open; mod ord; mod pow; mod print; @@ -38,7 +39,7 @@ use strum::{Display, EnumString, FromRepr, IntoStaticStr}; use crate::{ args::ArgValues, - bytecode::VM, + bytecode::{CallResult, VM}, exception_private::{ExcType, RunResult}, resource::ResourceTracker, types::Type, @@ -61,11 +62,14 @@ pub(crate) enum Builtins { impl Builtins { /// Calls this builtin with the given arguments. - pub fn call(self, vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { + /// + /// Returns `CallResult` to support builtins like `open()` that need to + /// yield `OsCall` to the host rather than returning a value directly. + pub fn call(self, vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { match self { Self::Function(b) => b.call(vm, args), - Self::ExcType(exc) => exc.call(vm, args), - Self::Type(t) => t.call(vm, args), + Self::ExcType(exc) => exc.call(vm, args).map(CallResult::Value), + Self::Type(t) => t.call(vm, args).map(CallResult::Value), } } @@ -181,7 +185,7 @@ pub enum BuiltinsFunctions { Next, // object - handled by Type enum Oct, - // Open, + Open, Ord, Pow, Print, @@ -210,7 +214,19 @@ impl BuiltinsFunctions { /// /// All builtins receive the full VM context, which provides access to the heap, /// interned strings, and print output. - pub(crate) fn call(self, vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { + /// + /// Returns `CallResult` to support builtins like `open()` that need to yield + /// `OsCall` to the host rather than returning a value directly. + pub(crate) fn call(self, vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { + match self { + Self::Open => open::builtin_open(vm, args), + // All other builtins return a Value directly + other => other.call_value(vm, args).map(CallResult::Value), + } + } + + /// Executes builtins that always return a `Value` directly. + fn call_value(self, vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { match self { Self::Abs => abs::builtin_abs(vm, args), Self::All => all::builtin_all(vm, args), @@ -241,6 +257,7 @@ impl BuiltinsFunctions { Self::Sum => sum::builtin_sum(vm, args), Self::Type => type_::builtin_type(vm, args), Self::Zip => zip::builtin_zip(vm, args), + Self::Open => unreachable!("Open is handled in call()"), } } } diff --git a/crates/monty/src/builtins/open.rs b/crates/monty/src/builtins/open.rs new file mode 100644 index 000000000..8a3ce0741 --- /dev/null +++ b/crates/monty/src/builtins/open.rs @@ -0,0 +1,76 @@ +//! Implementation of the `open()` builtin function. +//! +//! `open()` yields an `OsCall(FileOpen)` to the host, which reads or prepares the file +//! and returns `MontyObject::FileData`. The VM then creates a `FileObject` on the heap. + +use crate::{ + ResourceTracker, + args::ArgValues, + bytecode::{CallResult, VM}, + exception_private::{ExcType, RunResult, SimpleException}, + heap::HeapData, + os::OsFunction, + types::str::allocate_string, + value::Value, +}; + +/// Implementation of the `open()` builtin function. +/// +/// Accepts `open(path)` or `open(path, mode)`. The mode defaults to `'r'` (read). +/// Yields `OsCall(FileOpen)` with path and mode arguments for the host to resolve. +pub fn builtin_open(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) -> RunResult { + let (path_value, mode_value) = args.get_one_two_args("open", vm.heap)?; + + // Extract strings to owned copies before dropping values + let path_str = extract_str(&path_value, vm, "open", "file")?.to_owned(); + let mode_str = if let Some(ref mode_val) = mode_value { + extract_str(mode_val, vm, "open", "mode")?.to_owned() + } else { + "r".to_owned() + }; + + // Clean up the original arguments + path_value.drop_with_heap(vm); + if let Some(m) = mode_value { + m.drop_with_heap(vm); + } + + // Validate mode + if mode_str != "r" && mode_str != "w" { + return Err(SimpleException::new_msg(ExcType::ValueError, format!("invalid mode: '{mode_str}'")).into()); + } + + // Build the OsCall arguments: path string and mode string as Values + let os_path = allocate_string(path_str, vm.heap)?; + let os_mode = allocate_string(mode_str, vm.heap)?; + + Ok(CallResult::OsCall( + OsFunction::FileOpen, + ArgValues::Two(os_path, os_mode), + )) +} + +/// Extracts a `&str` from a `Value`, raising `TypeError` if it's not a string. +fn extract_str<'a>( + value: &'a Value, + vm: &'a VM<'_, '_, impl ResourceTracker>, + func_name: &str, + arg_name: &str, +) -> RunResult<&'a str> { + match value { + Value::InternString(id) => Ok(vm.interns.get_str(*id)), + Value::Ref(id) => match vm.heap.get(*id) { + HeapData::Str(s) => Ok(s.as_str()), + _ => Err(SimpleException::new_msg( + ExcType::TypeError, + format!("{func_name}() {arg_name} argument must be str"), + ) + .into()), + }, + _ => Err(SimpleException::new_msg( + ExcType::TypeError, + format!("{func_name}() {arg_name} argument must be str"), + ) + .into()), + } +} diff --git a/crates/monty/src/bytecode/compiler.rs b/crates/monty/src/bytecode/compiler.rs index 7bea26ec7..bd54cbc07 100644 --- a/crates/monty/src/bytecode/compiler.rs +++ b/crates/monty/src/bytecode/compiler.rs @@ -26,8 +26,9 @@ use crate::{ }, fstring::{ConversionFlag, FStringPart, FormatSpec, ParsedFormatSpec, encode_format_spec}, function::Function, - intern::{Interns, StringId}, + intern::{Interns, StaticStrings, StringId}, modules::StandardLib, + namespace::NamespaceId, parse::{CodeRange, ExceptHandler, Try}, value::{EitherStr, Value}, }; @@ -396,6 +397,12 @@ impl<'a> Compiler<'a> { } Node::FunctionDef(func_def) => self.compile_function_def(func_def)?, Node::Try(try_block) => self.compile_try(try_block)?, + Node::With { + context_expr, + optional_var, + manager_slot, + body, + } => self.compile_with(context_expr, optional_var.as_ref(), *manager_slot, body)?, Node::Import { names } => { for import_name in names { self.compile_import(import_name.module_name, &import_name.binding); @@ -2739,6 +2746,212 @@ impl<'a> Compiler<'a> { /// same approach CPython uses - each path has different stack state at entry /// (e.g., return has a value on stack, break has popped the iterator), so we /// can't easily share a single copy. The duplication is intentional. + /// Compiles a `with` statement using the context manager protocol. + /// + /// The bytecode structure is: + /// ```text + /// [evaluate EXPR] # push context manager + /// DUP # stack: [mgr, mgr] + /// STORE mgr_slot # save manager for __exit__; stack: [mgr] + /// CALL_ATTR __enter__ 0 # stack: [enter_result] + /// STORE var_slot / POP # store or discard enter result + /// + /// body_start: + /// [body] + /// body_end: + /// + /// # === Normal exit path === + /// LOAD mgr_slot # load manager + /// LOAD_NONE × 3 # None, None, None + /// CALL_ATTR __exit__ 3 # call __exit__(None, None, None) + /// POP # discard result + /// JUMP end + /// + /// # === Exception handler === + /// handler: + /// POP # pop exception from operand stack + /// LOAD mgr_slot # load manager + /// LOAD_NONE × 3 # TODO: pass actual exc_type, exc_val, exc_tb + /// CALL_ATTR __exit__ 3 + /// JUMP_IF_TRUE suppress + /// RERAISE + /// suppress: + /// CLEAR_EXCEPTION + /// end: + /// ``` + fn compile_with( + &mut self, + context_expr: &ExprLoc, + optional_var: Option<&Identifier>, + manager_slot: Option, + body: &[PreparedNode], + ) -> Result<(), CompileError> { + let mgr_slot = u16::try_from(manager_slot.expect("manager_slot must be set during prepare").index()) + .expect("manager slot exceeds u16"); + + let enter_id: StringId = StaticStrings::DunderEnter.into(); + let exit_id: StringId = StaticStrings::DunderExit.into(); + let enter_idx = u16::try_from(enter_id.index()).expect("enter name index exceeds u16"); + let exit_idx = u16::try_from(exit_id.index()).expect("exit name index exceeds u16"); + + // === Setup === + // Evaluate context expression → push manager + self.compile_expr(context_expr)?; + // Duplicate manager (one for __enter__, one saved for __exit__) + self.code.emit(Opcode::Dup); + // Store manager copy in hidden slot + if self.is_module_scope { + self.code.emit_u16(Opcode::StoreGlobal, mgr_slot); + } else { + self.code.emit_store_local(mgr_slot); + } + // Call __enter__() on the manager + self.code.emit_u16_u8(Opcode::CallAttr, enter_idx, 0); + // Store enter result in variable or discard + if let Some(var) = optional_var { + self.compile_store(var); + } else { + self.code.emit(Opcode::Pop); + } + + // Record stack depth before body (for exception table unwinding) + let stack_depth = self.code.stack_depth(); + + // Push a FinallyTarget so return/break/continue call __exit__ first + self.finally_targets.push(FinallyTarget { + return_jumps: Vec::new(), + break_jumps: Vec::new(), + continue_jumps: Vec::new(), + loop_depth_at_entry: self.loop_stack.len(), + }); + + // === Body (protected by exception table) === + let body_start = self.code.current_offset(); + self.compile_block(body)?; + let body_end = self.code.current_offset(); + + // Jump to normal exit path (skip exception handler) + let normal_exit_jump = self.code.emit_jump(Opcode::Jump); + + // === Exception handler === + let handler_start = self.code.current_offset(); + // VM pushes exception onto stack + self.code.adjust_stack_depth(1); + // Pop exception from operand stack (it's on exception_stack too) + self.code.emit(Opcode::Pop); + // Load manager and call __exit__(None, None, None) + self.emit_load_manager(mgr_slot); + self.code.emit(Opcode::LoadNone); + self.code.emit(Opcode::LoadNone); + self.code.emit(Opcode::LoadNone); + self.code.emit_u16_u8(Opcode::CallAttr, exit_idx, 3); + // If __exit__ returns truthy, suppress the exception + let suppress_jump = self.code.emit_jump(Opcode::JumpIfTrue); + self.code.emit(Opcode::Reraise); + + // suppress: clear exception and continue + self.code.patch_jump(suppress_jump); + self.code.emit(Opcode::ClearException); + let after_suppress_jump = self.code.emit_jump(Opcode::Jump); + + // === Return/break/continue paths through __exit__ === + let finally_target = self.finally_targets.pop().expect("finally_targets should not be empty"); + + // Return path: return value is on stack + let return_path_start = if finally_target.return_jumps.is_empty() { + None + } else { + let start = self.code.current_offset(); + for jump in &finally_target.return_jumps { + self.code.patch_jump(*jump); + } + // Return value is on stack, stack = stack_depth + 1 + self.code.set_stack_depth(stack_depth + 1); + // Load manager and call __exit__(None, None, None) + self.emit_load_manager(mgr_slot); + self.code.emit(Opcode::LoadNone); + self.code.emit(Opcode::LoadNone); + self.code.emit(Opcode::LoadNone); + self.code.emit_u16_u8(Opcode::CallAttr, exit_idx, 3); + self.code.emit(Opcode::Pop); // discard __exit__ result + self.compile_return(); + Some(start) + }; + + // Break path + if !finally_target.break_jumps.is_empty() { + for break_info in &finally_target.break_jumps { + self.code.patch_jump(break_info.jump); + } + self.code.set_stack_depth(stack_depth.saturating_sub(1)); + self.emit_load_manager(mgr_slot); + self.code.emit(Opcode::LoadNone); + self.code.emit(Opcode::LoadNone); + self.code.emit(Opcode::LoadNone); + self.code.emit_u16_u8(Opcode::CallAttr, exit_idx, 3); + self.code.emit(Opcode::Pop); + self.compile_control_flow_after_finally(&finally_target.break_jumps, true); + } + + // Continue path + if !finally_target.continue_jumps.is_empty() { + for continue_info in &finally_target.continue_jumps { + self.code.patch_jump(continue_info.jump); + } + self.code.set_stack_depth(stack_depth); + self.emit_load_manager(mgr_slot); + self.code.emit(Opcode::LoadNone); + self.code.emit(Opcode::LoadNone); + self.code.emit(Opcode::LoadNone); + self.code.emit_u16_u8(Opcode::CallAttr, exit_idx, 3); + self.code.emit(Opcode::Pop); + self.compile_control_flow_after_finally(&finally_target.continue_jumps, false); + } + + // === Normal exit path === + self.code.patch_jump(normal_exit_jump); + self.code.patch_jump(after_suppress_jump); + self.code.set_stack_depth(stack_depth); + // Load manager and call __exit__(None, None, None) for normal exit + self.emit_load_manager(mgr_slot); + self.code.emit(Opcode::LoadNone); + self.code.emit(Opcode::LoadNone); + self.code.emit(Opcode::LoadNone); + self.code.emit_u16_u8(Opcode::CallAttr, exit_idx, 3); + self.code.emit(Opcode::Pop); // discard __exit__ result + + // === Exception table entries === + // Body → exception handler + self.code.add_exception_entry(ExceptionEntry::new( + u32::try_from(body_start).expect("bytecode offset exceeds u32"), + u32::try_from(body_end).expect("bytecode offset exceeds u32") + 3, // +3 to include JUMP + u32::try_from(handler_start).expect("bytecode offset exceeds u32"), + stack_depth, + )); + + // If there's a return path, protect it with an exception entry too + if let Some(return_start) = return_path_start { + let normal_exit = self.code.current_offset(); + self.code.add_exception_entry(ExceptionEntry::new( + u32::try_from(return_start).expect("bytecode offset exceeds u32"), + u32::try_from(normal_exit).expect("bytecode offset exceeds u32"), + u32::try_from(handler_start).expect("bytecode offset exceeds u32"), + stack_depth, + )); + } + + Ok(()) + } + + /// Emits a load instruction for the hidden context manager slot. + fn emit_load_manager(&mut self, mgr_slot: u16) { + if self.is_module_scope { + self.code.emit_u16(Opcode::LoadGlobal, mgr_slot); + } else { + self.code.emit_load_local(mgr_slot); + } + } + fn compile_try(&mut self, try_block: &Try) -> Result<(), CompileError> { let has_finally = !try_block.finally.is_empty(); let has_handlers = !try_block.handlers.is_empty(); diff --git a/crates/monty/src/bytecode/vm/call.rs b/crates/monty/src/bytecode/vm/call.rs index 63d7ab471..fcaa94d96 100644 --- a/crates/monty/src/bytecode/vm/call.rs +++ b/crates/monty/src/bytecode/vm/call.rs @@ -82,7 +82,13 @@ impl VM<'_, '_, T> { /// /// Calls a builtin function directly without stack manipulation for the callable. /// This is an optimization that avoids constant pool lookup and stack manipulation. - pub(super) fn exec_call_builtin_function(&mut self, builtin_id: u8, arg_count: usize) -> Result { + /// + /// Returns `CallResult` to support builtins like `open()` that yield `OsCall`. + pub(super) fn exec_call_builtin_function( + &mut self, + builtin_id: u8, + arg_count: usize, + ) -> Result { // Convert u8 to BuiltinsFunctions via FromRepr if let Some(builtin) = BuiltinsFunctions::from_repr(builtin_id) { let args = self.pop_n_args(arg_count); @@ -356,10 +362,7 @@ impl VM<'_, '_, T> { /// - `Value::Ref`: checks for closure/function on heap pub(crate) fn call_function(&mut self, callable: &Value, args: ArgValues) -> Result { match callable { - Value::Builtin(builtin) => { - let result = builtin.call(self, args)?; - Ok(CallResult::Value(result)) - } + Value::Builtin(builtin) => builtin.call(self, args), Value::ModuleFunction(mf) => mf.call(self, args), Value::ExtFunction(name_id) => { // External function - return to caller to execute diff --git a/crates/monty/src/bytecode/vm/mod.rs b/crates/monty/src/bytecode/vm/mod.rs index 333c49f52..25f5f6dbc 100644 --- a/crates/monty/src/bytecode/vm/mod.rs +++ b/crates/monty/src/bytecode/vm/mod.rs @@ -1260,13 +1260,14 @@ impl<'h, 'a, T: ResourceTracker> VM<'h, 'a, T> { let arg_count = fetch_u8!(cached_frame) as usize; // Sync IP before call (builtins like map() may call evaluate_function - // which pushes frames and runs a nested run() loop) + // which pushes frames and runs a nested run() loop, or open() yields OsCall) self.current_frame_mut().ip = cached_frame.ip; - match self.exec_call_builtin_function(builtin_id, arg_count) { - Ok(result) => self.push(result), - Err(err) => catch_sync!(self, cached_frame, err), - } + handle_call_result!( + self, + cached_frame, + self.exec_call_builtin_function(builtin_id, arg_count) + ); } Opcode::CallBuiltinType => { // Fetch operands: type_id (u8) + arg_count (u8) diff --git a/crates/monty/src/expressions.rs b/crates/monty/src/expressions.rs index ef3199f62..129a4fa88 100644 --- a/crates/monty/src/expressions.rs +++ b/crates/monty/src/expressions.rs @@ -543,6 +543,26 @@ pub enum Node { target_position: CodeRange, value: ExprLoc, }, + /// Context manager statement: `with EXPR as VAR: BODY` + /// + /// Multiple `with` items are desugared to nested `With` nodes by the parser. + /// The context manager protocol calls `__enter__()` on entry and `__exit__()` + /// on exit (including exceptional exits). + With { + /// The context manager expression (e.g., `open('file')`). + context_expr: ExprLoc, + /// Optional target variable for `as VAR` clause. + /// If `None`, the result of `__enter__()` is discarded. + optional_var: Option, + /// Hidden local namespace slot for storing the context manager object. + /// + /// This is needed because `__exit__` must be called on the original context manager, + /// not the value returned by `__enter__` (they may differ). The slot is allocated + /// during the prepare phase. `None` in the parsed form, `Some` after prepare. + manager_slot: Option, + /// The body of the `with` block. + body: Vec, + }, For { /// Loop target - either a single identifier or tuple unpacking pattern. target: UnpackTarget, diff --git a/crates/monty/src/heap.rs b/crates/monty/src/heap.rs index 8757f7102..77648a1fa 100644 --- a/crates/monty/src/heap.rs +++ b/crates/monty/src/heap.rs @@ -26,7 +26,7 @@ use crate::{ types::{ Bytes, Dataclass, Dict, DictItemsView, DictKeysView, DictValuesView, FrozenSet, List, LongInt, Module, MontyIter, NamedTuple, Path, Range, ReMatch, RePattern, Set, Slice, Str, TimeZone, Tuple, allocate_tuple, date, - datetime, timedelta, timezone, + datetime, file_object, timedelta, timezone, }, value::Value, }; @@ -234,6 +234,7 @@ impl<'a, T: ResourceTracker> HeapReader<'a, T> { HeapData::DateTime(d) => HeapReadOutput::DateTime(heap_read(base, d, readers)), HeapData::TimeDelta(d) => HeapReadOutput::TimeDelta(heap_read(base, d, readers)), HeapData::TimeZone(d) => HeapReadOutput::TimeZone(heap_read(base, d, readers)), + HeapData::FileObject(f) => HeapReadOutput::FileObject(heap_read(base, f, readers)), } } @@ -319,6 +320,7 @@ pub enum HeapReadOutput<'a> { DateTime(HeapRead<'a, datetime::DateTime>), TimeDelta(HeapRead<'a, timedelta::TimeDelta>), TimeZone(HeapRead<'a, timezone::TimeZone>), + FileObject(HeapRead<'a, file_object::FileObject>), } pub struct HeapRead<'a, T: ?Sized> { diff --git a/crates/monty/src/heap_data.rs b/crates/monty/src/heap_data.rs index a7bf53c60..d8a9f1f6a 100644 --- a/crates/monty/src/heap_data.rs +++ b/crates/monty/src/heap_data.rs @@ -12,9 +12,9 @@ use crate::{ heap::{DropWithHeap, HeapId, HeapItem, HeapReadOutput}, intern::FunctionId, types::{ - Bytes, Dataclass, Dict, DictItemsView, DictKeysView, DictValuesView, FrozenSet, List, LongInt, Module, - MontyIter, NamedTuple, Path, PyTrait, Range, ReMatch, RePattern, Set, Slice, Str, Tuple, Type, date, datetime, - dict_view::DictView, timedelta, timezone, + Bytes, Dataclass, Dict, DictItemsView, DictKeysView, DictValuesView, FileObject, FrozenSet, List, LongInt, + Module, MontyIter, NamedTuple, Path, PyTrait, Range, ReMatch, RePattern, Set, Slice, Str, Tuple, Type, date, + datetime, dict_view::DictView, timedelta, timezone, }, value::{EitherStr, Value}, }; @@ -123,6 +123,11 @@ pub(crate) enum HeapData { TimeDelta(timedelta::TimeDelta), /// A fixed-offset `datetime.timezone` value. TimeZone(timezone::TimeZone), + /// A file object from `open()`, holding eagerly-loaded content or write buffer. + /// + /// Implements the context manager protocol and file I/O methods. + /// Leaf type: no heap references, not GC-tracked. + FileObject(FileObject), } impl HeapData { @@ -240,6 +245,7 @@ impl HeapData { Self::DateTime(_) => Type::DateTime, Self::TimeDelta(_) => Type::TimeDelta, Self::TimeZone(_) => Type::TimeZone, + Self::FileObject(_) => Type::TextIOWrapper, } } @@ -276,6 +282,7 @@ impl HeapData { Self::DateTime(d) => d.py_estimate_size(), Self::TimeDelta(d) => d.py_estimate_size(), Self::TimeZone(d) => d.py_estimate_size(), + Self::FileObject(f) => f.py_estimate_size(), } } } @@ -450,6 +457,7 @@ impl<'h> PyTrait<'h> for HeapReadOutput<'h> { Self::RePattern(p) => p.py_bool(vm), Self::TimeDelta(td) => td.py_bool(vm), Self::Date(_) | Self::DateTime(_) | Self::TimeZone(_) => true, + Self::FileObject(f) => f.py_bool(vm), } } @@ -479,6 +487,7 @@ impl<'h> PyTrait<'h> for HeapReadOutput<'h> { HeapReadOutput::TimeDelta(td) => Ok(td.py_call_attr(self_id, vm, attr, args)?), HeapReadOutput::Date(d) => Ok(d.py_call_attr(self_id, vm, attr, args)?), HeapReadOutput::DateTime(dt) => Ok(dt.py_call_attr(self_id, vm, attr, args)?), + HeapReadOutput::FileObject(f) => Ok(f.py_call_attr(self_id, vm, attr, args)?), // Types without methods — return AttributeError _ => { args.drop_with_heap(vm); @@ -518,6 +527,7 @@ impl<'h> PyTrait<'h> for HeapReadOutput<'h> { Self::DateTime(d) => d.py_type(vm), Self::TimeDelta(d) => d.py_type(vm), Self::TimeZone(d) => d.py_type(vm), + Self::FileObject(f) => f.py_type(vm), } } @@ -715,6 +725,7 @@ impl<'h> PyTrait<'h> for HeapReadOutput<'h> { Self::DateTime(d) => d.py_repr_fmt(f, vm, heap_ids), Self::TimeDelta(d) => d.py_repr_fmt(f, vm, heap_ids), Self::TimeZone(d) => d.py_repr_fmt(f, vm, heap_ids), + Self::FileObject(file) => file.py_repr_fmt(f, vm, heap_ids), } } @@ -913,6 +924,7 @@ impl<'h> PyTrait<'h> for HeapReadOutput<'h> { Self::Date(d) => d.py_getattr(attr, vm), Self::DateTime(dt) => dt.py_getattr(attr, vm), Self::TimeDelta(td) => td.py_getattr(attr, vm), + Self::FileObject(f) => f.py_getattr(attr, vm), _ => Ok(None), } } diff --git a/crates/monty/src/intern.rs b/crates/monty/src/intern.rs index 77e9b2e6c..dd826be8e 100644 --- a/crates/monty/src/intern.rs +++ b/crates/monty/src/intern.rs @@ -581,6 +581,15 @@ pub enum StaticStrings { Finditer, /// `match.groupdict()` method Groupdict, + + // ========================== + // Context manager protocol + /// `__enter__` method for context managers + #[strum(serialize = "__enter__")] + DunderEnter, + /// `__exit__` method for context managers + #[strum(serialize = "__exit__")] + DunderExit, } impl StaticStrings { diff --git a/crates/monty/src/object.rs b/crates/monty/src/object.rs index 45956bcb9..9065c2808 100644 --- a/crates/monty/src/object.rs +++ b/crates/monty/src/object.rs @@ -325,6 +325,21 @@ pub enum MontyObject { /// Optional docstring for the function. docstring: Option, }, + /// File data returned by the host after an `open()` OsCall. + /// + /// For read mode, `content` contains the file content. + /// For write mode, `content` is empty (writes are accumulated in the `FileObject`). + /// + /// This is input-only: when the host resolves `OsFunction::FileOpen`, it returns this + /// variant which the VM converts to a `FileObject` on the heap. + FileData { + /// The file path. + path: String, + /// The file mode (e.g., `"r"`, `"w"`). + mode: String, + /// The file content (populated for read mode, empty for write mode). + content: String, + }, /// Fallback for values that cannot be represented as other variants. /// /// Contains the `repr()` string of the original value. @@ -537,6 +552,14 @@ impl MontyObject { Ok(Value::Ref(vm.heap.allocate(HeapData::ExtFunction(name))?)) } } + Self::FileData { path, mode, content } => { + use crate::types::file_object::FileObject; + let file = match mode.as_str() { + "w" => FileObject::new_write(path), + _ => FileObject::new_read(path, content), + }; + Ok(Value::Ref(vm.heap.allocate(HeapData::FileObject(file))?)) + } Self::Repr(_) => Err(InvalidInputError::invalid_type("'Repr' is not a valid input value")), Self::Cycle(_, _) => Err(InvalidInputError::invalid_type("'Cycle' is not a valid input value")), } @@ -736,6 +759,7 @@ impl MontyObject { name: name.clone(), docstring: None, }, + HeapData::FileObject(_) => repr_or_error(object, vm), }; // Remove from visited set after processing @@ -994,6 +1018,7 @@ impl MontyObject { Self::Type(t) => write!(f, ""), Self::BuiltinFunction(func) => write!(f, ""), Self::Function { name, .. } => write!(f, ""), + Self::FileData { path, mode, .. } => write!(f, "<_io.TextIOWrapper name='{path}' mode='{mode}'>"), Self::Repr(s) => write!(f, "Repr({})", StringRepr(s)), Self::Cycle(_, placeholder) => f.write_str(placeholder), } @@ -1032,6 +1057,7 @@ impl MontyObject { Self::Exception { .. } => true, Self::Path(_) => true, // Path instances are always truthy Self::Dataclass { .. } => true, // Dataclass instances are always truthy + Self::FileData { .. } => true, Self::Type(_) | Self::BuiltinFunction(_) | Self::Function { .. } | Self::Repr(_) | Self::Cycle(_, _) => { true } @@ -1067,6 +1093,7 @@ impl MontyObject { Self::Type(_) => "type", Self::BuiltinFunction(_) => "builtin_function_or_method", Self::Function { .. } => "function", + Self::FileData { .. } => "TextIOWrapper", Self::Repr(_) => "repr", Self::Cycle(_, _) => "cycle", } diff --git a/crates/monty/src/os.rs b/crates/monty/src/os.rs index 14a10bb3c..7bd33bc14 100644 --- a/crates/monty/src/os.rs +++ b/crates/monty/src/os.rs @@ -92,6 +92,19 @@ pub enum OsFunction { /// The host should return `MontyObject::DateTime`. #[strum(serialize = "datetime.now")] DateTimeNow, + /// Open a file for reading or writing. + /// + /// Takes two arguments: the file path (string) and the mode (string, e.g. `"r"` or `"w"`). + /// The host should return `MontyObject::FileData` for read mode (containing the file + /// content), or `MontyObject::None` for write mode (the file will be written on close). + #[strum(serialize = "open")] + FileOpen, + /// Close a file and flush written content to disk (write mode only). + /// + /// Takes two arguments: the file path (string) and the accumulated written content (string). + /// The host should write the content to the file and return `MontyObject::None`. + #[strum(serialize = "file.close")] + FileClose, } impl TryFrom for OsFunction { diff --git a/crates/monty/src/parse.rs b/crates/monty/src/parse.rs index 53a27bf4f..c8c4d3cb7 100644 --- a/crates/monty/src/parse.rs +++ b/crates/monty/src/parse.rs @@ -195,6 +195,31 @@ impl<'a> Parser<'a> { statements.into_iter().map(|f| self.parse_statement(f)).collect() } + /// Parses `with` items, desugaring multiple items into nested `With` nodes. + /// + /// `with a() as x, b() as y: body` becomes: + /// `With { a() as x, body: [With { b() as y, body }] }` + fn parse_with_items(&mut self, items: Vec, body: Vec) -> Result { + // Desugar from right to left: innermost item wraps the body, + // each outer item wraps the previous result. + let mut current_body = body; + for item in items.into_iter().rev() { + let context_expr = self.parse_expression(item.context_expr)?; + let optional_var = item + .optional_vars + .map(|vars| self.parse_identifier(*vars)) + .transpose()?; + current_body = vec![Node::With { + context_expr, + optional_var, + manager_slot: None, + body: current_body, + }]; + } + // Unwrap the single outer node + Ok(current_body.into_iter().next().expect("items was non-empty")) + } + fn parse_elif_else_clauses(&mut self, clauses: Vec) -> Result, ParseError> { let mut tail: Vec = Vec::new(); for clause in clauses.into_iter().rev() { @@ -371,18 +396,27 @@ impl<'a> Parser<'a> { let or_else = self.parse_elif_else_clauses(elif_else_clauses)?; Ok(Node::If { test, body, or_else }) } - Stmt::With(ast::StmtWith { is_async, range, .. }) => { + Stmt::With(ast::StmtWith { + is_async, + range, + items, + body, + .. + }) => { if is_async { - Err(ParseError::not_implemented( + return Err(ParseError::not_implemented( "async context managers (async with)", self.convert_range(range), - )) - } else { - Err(ParseError::not_implemented( - "context managers (with statements)", + )); + } + if items.is_empty() { + return Err(ParseError::not_implemented( + "with statement with no items", self.convert_range(range), - )) + )); } + let body = self.parse_statements(body)?; + self.parse_with_items(items, body) } Stmt::Match(m) => Err(ParseError::not_implemented( "pattern matching (match statements)", diff --git a/crates/monty/src/prepare.rs b/crates/monty/src/prepare.rs index 055eef961..e2039871e 100644 --- a/crates/monty/src/prepare.rs +++ b/crates/monty/src/prepare.rs @@ -580,6 +580,28 @@ impl<'i> Prepare<'i> { finally, })); } + Node::With { + context_expr, + optional_var, + body, + .. + } => { + let context_expr = self.prepare_expression(context_expr)?; + let optional_var = optional_var.map(|var| { + self.names_assigned_in_order + .insert(self.interner.get_str(var.name_id).to_string()); + self.get_id(var).0 + }); + // Allocate a hidden local slot for the context manager. + let mgr_slot = self.allocate_anonymous_slot(); + let body = self.prepare_nodes(body)?; + new_nodes.push(Node::With { + context_expr, + optional_var, + manager_slot: Some(mgr_slot), + body, + }); + } Node::Import { names } => { // Resolve each binding identifier to get the namespace slot let resolved_names = names @@ -1604,6 +1626,17 @@ impl<'i> Prepare<'i> { )) } + /// Allocates an anonymous local namespace slot for internal use. + /// + /// Used by the `with` statement to store the context manager object so that + /// `__exit__` can be called in exception handlers. The slot has no associated + /// Python name — it's purely an implementation detail of the bytecode. + fn allocate_anonymous_slot(&mut self) -> NamespaceId { + let id = NamespaceId::new(self.namespace_size); + self.namespace_size += 1; + id + } + /// Resolves an identifier to its namespace index and scope, creating a new entry if needed. /// /// TODO This whole implementation seems ugly at best. @@ -2082,6 +2115,20 @@ fn collect_scope_info_from_node( collect_scope_info_from_node(n, global_names, nonlocal_names, assigned_names, interner); } } + Node::With { + context_expr, + optional_var, + body, + .. + } => { + collect_assigned_names_from_expr(context_expr, assigned_names, interner); + if let Some(var) = optional_var { + assigned_names.insert(interner.get_str(var.name_id).to_string()); + } + for n in body { + collect_scope_info_from_node(n, global_names, nonlocal_names, assigned_names, interner); + } + } // Import creates bindings for each module name (or alias) Node::Import { names, .. } => { for import_name in names { @@ -2392,6 +2439,12 @@ fn collect_cell_vars_from_node( collect_cell_vars_from_node(n, our_locals, cell_vars, interner); } } + Node::With { context_expr, body, .. } => { + collect_cell_vars_from_expr(context_expr, our_locals, cell_vars, interner); + for n in body { + collect_cell_vars_from_node(n, our_locals, cell_vars, interner); + } + } // Handle expressions that may contain lambdas Node::Expr(expr) | Node::Return(expr) => { collect_cell_vars_from_expr(expr, our_locals, cell_vars, interner); @@ -2754,6 +2807,12 @@ fn collect_referenced_names_from_node(node: &ParseNode, referenced: &mut AHashSe collect_referenced_names_from_node(n, referenced, interner); } } + Node::With { context_expr, body, .. } => { + collect_referenced_names_from_expr(context_expr, referenced, interner); + for n in body { + collect_referenced_names_from_node(n, referenced, interner); + } + } // Imports create bindings but don't reference names Node::Import { .. } | Node::ImportFrom { .. } => {} Node::Pass diff --git a/crates/monty/src/types/file_object.rs b/crates/monty/src/types/file_object.rs new file mode 100644 index 000000000..90ce7085a --- /dev/null +++ b/crates/monty/src/types/file_object.rs @@ -0,0 +1,344 @@ +//! Python file object type implementation (`_io.TextIOWrapper`). +//! +//! Provides an eagerly-loaded file object returned by `open()`. The file content +//! is read entirely into memory via an `OsCall` when `open()` is called. Read +//! operations (`read()`, `readline()`) return data from the in-memory buffer. +//! Write operations accumulate data that is flushed to the host on `close()`. +//! +//! The `FileObject` implements the context manager protocol (`__enter__`/`__exit__`) +//! so it can be used with `with` statements. + +use std::{fmt::Write, mem}; + +use ahash::AHashSet; + +use crate::{ + ResourceTracker, + args::ArgValues, + bytecode::{CallResult, VM}, + exception_private::{ExcType, RunResult, SimpleException}, + heap::{DropWithHeap, HeapData, HeapId, HeapItem, HeapRead}, + os::OsFunction, + resource::ResourceError, + types::{PyTrait, Type, str::allocate_string}, + value::{EitherStr, Value}, +}; + +/// The file mode: read or write. +#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +pub(crate) enum FileMode { + /// Read mode (`'r'`). Content is loaded eagerly from the host. + Read, + /// Write mode (`'w'`). Written data is accumulated and flushed on close. + Write, +} + +impl FileMode { + /// Returns the Python mode string for this file mode. + fn as_str(self) -> &'static str { + match self { + Self::Read => "r", + Self::Write => "w", + } + } +} + +/// A file object holding eagerly-loaded content (for reads) or accumulated writes. +/// +/// Created by the `open()` builtin via an `OsCall`. For read mode, the host reads +/// the entire file and passes the content back. For write mode, writes are buffered +/// in memory and flushed to the host when the file is closed. +/// +/// Implements the context manager protocol: `__enter__` returns self, `__exit__` +/// calls `close()`. This ensures files opened with `with open(...) as f:` are +/// properly closed even if an exception occurs. +#[derive(Debug, serde::Serialize, serde::Deserialize)] +pub(crate) struct FileObject { + /// The file path (used in repr and for write-mode flush). + path: Box, + /// The file mode (read or write). + mode: FileMode, + /// File content (eagerly loaded for read mode, empty for write mode). + content: Box, + /// Current read position (byte offset into `content`). + position: usize, + /// Whether the file has been closed. + closed: bool, + /// Accumulated write data (only used in write mode). + written: Option, +} + +impl FileObject { + /// Creates a new read-mode file object with the given content. + pub fn new_read(path: String, content: String) -> Self { + Self { + path: path.into_boxed_str(), + mode: FileMode::Read, + content: content.into_boxed_str(), + position: 0, + closed: false, + written: None, + } + } + + /// Creates a new write-mode file object. + pub fn new_write(path: String) -> Self { + Self { + path: path.into_boxed_str(), + mode: FileMode::Write, + content: String::new().into_boxed_str(), + position: 0, + closed: false, + written: Some(String::new()), + } + } +} + +impl HeapItem for FileObject { + fn py_estimate_size(&self) -> usize { + mem::size_of::() + self.path.len() + self.content.len() + self.written.as_ref().map_or(0, String::len) + } + + fn py_dec_ref_ids(&mut self, _stack: &mut Vec) { + // FileObject contains no heap references + } +} + +impl<'h> HeapRead<'h, FileObject> { + /// Reads all remaining content from the current position. + /// + /// Returns the content from the current read position to the end, + /// then advances the position to the end. + fn read_all(&mut self, vm: &mut VM<'h, '_, impl ResourceTracker>) -> RunResult { + let file = self.get_mut(vm.heap); + check_closed(file)?; + check_readable(file)?; + let remaining = file.content[file.position..].to_owned(); + file.position = file.content.len(); + allocate_string(remaining, vm.heap) + } + + /// Reads the next line from the file. + /// + /// Returns the next line including the trailing newline (if present). + /// Returns an empty string at EOF. + fn readline(&mut self, vm: &mut VM<'h, '_, impl ResourceTracker>) -> RunResult { + let file = self.get_mut(vm.heap); + check_closed(file)?; + check_readable(file)?; + let remaining = &file.content[file.position..]; + let line_end = remaining.find('\n').map_or(remaining.len(), |pos| pos + 1); + let line = remaining[..line_end].to_owned(); + file.position += line_end; + allocate_string(line, vm.heap) + } + + /// Writes data to the file's write buffer. + /// + /// Returns the number of characters written. + fn write_str(&mut self, vm: &mut VM<'h, '_, impl ResourceTracker>, data: &str) -> RunResult { + let file = self.get_mut(vm.heap); + check_closed(file)?; + check_writable(file)?; + let len = data.len(); + file.written + .as_mut() + .expect("write buffer exists in write mode") + .push_str(data); + Ok(Value::Int(i64::try_from(len).unwrap_or(i64::MAX))) + } + + /// Closes the file. + /// + /// For read-mode files, this simply marks the file as closed. + /// For write-mode files, this returns an `OsCall` to flush the accumulated + /// writes to the host filesystem. + fn close(&mut self, vm: &mut VM<'h, '_, impl ResourceTracker>) -> RunResult { + let file = self.get_mut(vm.heap); + if file.closed { + return Ok(CallResult::Value(Value::None)); + } + file.closed = true; + if file.mode == FileMode::Write { + let written = file.written.take().unwrap_or_default(); + let path_str = file.path.to_string(); + let path_value = allocate_string(path_str, vm.heap)?; + let content_value = allocate_string(written, vm.heap)?; + Ok(CallResult::OsCall( + OsFunction::FileClose, + ArgValues::Two(path_value, content_value), + )) + } else { + Ok(CallResult::Value(Value::None)) + } + } +} + +impl<'h> PyTrait<'h> for HeapRead<'h, FileObject> { + fn py_type(&self, _vm: &VM<'h, '_, impl ResourceTracker>) -> Type { + Type::TextIOWrapper + } + + fn py_len(&self, _vm: &VM<'h, '_, impl ResourceTracker>) -> Option { + None + } + + fn py_eq(&self, _other: &Self, _vm: &mut VM<'h, '_, impl ResourceTracker>) -> Result { + // File objects use identity comparison (same as CPython) + Ok(false) + } + + fn py_bool(&self, _vm: &mut VM<'h, '_, impl ResourceTracker>) -> bool { + true + } + + fn py_repr_fmt( + &self, + f: &mut impl Write, + vm: &VM<'h, '_, impl ResourceTracker>, + _heap_ids: &mut AHashSet, + ) -> RunResult<()> { + let file = self.get(vm.heap); + Ok(write!( + f, + "<_io.TextIOWrapper name='{}' mode='{}'>", + file.path, + file.mode.as_str() + )?) + } + + fn py_call_attr( + &mut self, + self_id: HeapId, + vm: &mut VM<'h, '_, impl ResourceTracker>, + attr: &EitherStr, + args: ArgValues, + ) -> RunResult { + let method = attr.as_str(vm.interns); + match method { + "read" => { + args.check_zero_args("read", vm.heap)?; + Ok(CallResult::Value(self.read_all(vm)?)) + } + "readline" => { + args.check_zero_args("readline", vm.heap)?; + Ok(CallResult::Value(self.readline(vm)?)) + } + "write" => { + let data = args.get_one_arg("write", vm.heap)?; + // Extract the string to an owned copy to avoid borrow conflict + let s = get_str_arg(&data, vm, "write")?.to_owned(); + let result = self.write_str(vm, &s)?; + data.drop_with_heap(vm); + Ok(CallResult::Value(result)) + } + "close" => { + args.check_zero_args("close", vm.heap)?; + self.close(vm) + } + "__enter__" => { + args.check_zero_args("__enter__", vm.heap)?; + // Increment refcount because we're returning a new reference to self + vm.heap.inc_ref(self_id); + Ok(CallResult::Value(Value::Ref(self_id))) + } + "__exit__" => { + // __exit__ receives (exc_type, exc_val, exc_tb) — we ignore them + // and just close the file. Always returns False (never suppresses). + args.drop_with_heap(vm); + let close_result = self.close(vm)?; + // For read mode, close returns Value(None) — we return False. + // For write mode, close returns OsCall to flush — the host returns + // None which is falsy, matching __exit__'s "don't suppress" contract. + match close_result { + CallResult::Value(_) => Ok(CallResult::Value(Value::Bool(false))), + other => Ok(other), + } + } + "readable" => { + args.check_zero_args("readable", vm.heap)?; + let file = self.get(vm.heap); + Ok(CallResult::Value(Value::Bool(file.mode == FileMode::Read))) + } + "writable" => { + args.check_zero_args("writable", vm.heap)?; + let file = self.get(vm.heap); + Ok(CallResult::Value(Value::Bool(file.mode == FileMode::Write))) + } + _ => { + args.drop_with_heap(vm); + Err(ExcType::attribute_error(Type::TextIOWrapper, method)) + } + } + } + + fn py_getattr(&self, attr: &EitherStr, vm: &mut VM<'h, '_, impl ResourceTracker>) -> RunResult> { + let name = attr.as_str(vm.interns); + match name { + "closed" => { + let file = self.get(vm.heap); + Ok(Some(CallResult::Value(Value::Bool(file.closed)))) + } + "name" => { + let file = self.get(vm.heap); + let name_str = file.path.to_string(); + let value = allocate_string(name_str, vm.heap)?; + Ok(Some(CallResult::Value(value))) + } + "mode" => { + let file = self.get(vm.heap); + let mode_str = file.mode.as_str().to_owned(); + let value = allocate_string(mode_str, vm.heap)?; + Ok(Some(CallResult::Value(value))) + } + _ => Err(ExcType::attribute_error(Type::TextIOWrapper, name)), + } + } +} + +/// Checks that the file is not closed, raising `ValueError` if it is. +fn check_closed(file: &FileObject) -> RunResult<()> { + if file.closed { + Err(SimpleException::new_msg(ExcType::ValueError, "I/O operation on closed file.").into()) + } else { + Ok(()) + } +} + +/// Checks that the file is in read mode, raising `ValueError` if not. +fn check_readable(file: &FileObject) -> RunResult<()> { + if file.mode == FileMode::Read { + Ok(()) + } else { + Err(SimpleException::new_msg(ExcType::ValueError, "not readable").into()) + } +} + +/// Checks that the file is in write mode, raising `ValueError` if not. +fn check_writable(file: &FileObject) -> RunResult<()> { + if file.mode == FileMode::Write { + Ok(()) + } else { + Err(SimpleException::new_msg(ExcType::ValueError, "not writable").into()) + } +} + +/// Extracts a `&str` from a `Value`, raising `TypeError` if it's not a string. +fn get_str_arg<'a>(value: &'a Value, vm: &'a VM<'_, '_, impl ResourceTracker>, method: &str) -> RunResult<&'a str> { + match value { + Value::InternString(id) => Ok(vm.interns.get_str(*id)), + Value::Ref(id) => match vm.heap.get(*id) { + HeapData::Str(s) => Ok(s.as_str()), + other => Err(SimpleException::new_msg( + ExcType::TypeError, + format!("{method}() argument must be str, not {}", other.py_type()), + ) + .into()), + }, + _ => Err(SimpleException::new_msg( + ExcType::TypeError, + format!("{method}() argument must be str, not {}", value.py_type(vm)), + ) + .into()), + } +} diff --git a/crates/monty/src/types/mod.rs b/crates/monty/src/types/mod.rs index 1f9b8024e..0bc745a41 100644 --- a/crates/monty/src/types/mod.rs +++ b/crates/monty/src/types/mod.rs @@ -11,6 +11,7 @@ pub mod date; pub mod datetime; pub mod dict; pub mod dict_view; +pub mod file_object; pub mod iter; pub mod list; pub mod long_int; @@ -34,6 +35,7 @@ pub(crate) use bytes::Bytes; pub(crate) use dataclass::Dataclass; pub(crate) use dict::Dict; pub(crate) use dict_view::{DictItemsView, DictKeysView, DictValuesView}; +pub(crate) use file_object::FileObject; pub(crate) use iter::MontyIter; pub(crate) use list::List; pub(crate) use long_int::LongInt; diff --git a/crates/monty/test_cases/with__basic.py b/crates/monty/test_cases/with__basic.py new file mode 100644 index 000000000..229f025bf --- /dev/null +++ b/crates/monty/test_cases/with__basic.py @@ -0,0 +1,44 @@ +# call-external +from pathlib import Path + +# Create test files +Path('/virtual/test_with.txt').write_text('hello world') +Path('/virtual/test_with2.txt').write_text('second file') + +# === Basic with statement + open() === +with open('/virtual/test_with.txt') as f: + content = f.read() + assert content == 'hello world', f'expected hello world, got {content!r}' + +# f should be closed after with block +assert f.closed, 'file should be closed after with block' + +# === with statement without as clause === +with open('/virtual/test_with.txt'): + x = 42 +assert x == 42, 'body executed' + +# === Nested with statements === +with open('/virtual/test_with.txt') as f1: + with open('/virtual/test_with2.txt') as f2: + c1 = f1.read() + c2 = f2.read() + assert c1 == 'hello world', f'nested: expected hello world, got {c1!r}' + assert c2 == 'second file', f'nested: expected second file, got {c2!r}' + +assert f1.closed, 'f1 should be closed after nested with' +assert f2.closed, 'f2 should be closed after nested with' + +# === File attributes === +with open('/virtual/test_with.txt') as f: + assert f.name == '/virtual/test_with.txt', f'name mismatch: {f.name!r}' + assert f.mode == 'r', f'mode mismatch: {f.mode!r}' + assert not f.closed, 'should not be closed inside with' + +# === readline === +Path('/virtual/lines.txt').write_text('line1\nline2\nline3') +with open('/virtual/lines.txt') as f: + assert f.readline() == 'line1\n', 'first line' + assert f.readline() == 'line2\n', 'second line' + assert f.readline() == 'line3', 'third line (no trailing newline)' + assert f.readline() == '', 'EOF returns empty string' diff --git a/crates/monty/tests/datatest_runner.rs b/crates/monty/tests/datatest_runner.rs index 8c6be5671..5eb94dd6e 100644 --- a/crates/monty/tests/datatest_runner.rs +++ b/crates/monty/tests/datatest_runner.rs @@ -1060,6 +1060,52 @@ fn dispatch_os_call( .into() } } + OsFunction::FileOpen => { + // args[0] is path, args[1] is mode + let mode = String::try_from(&args[1]).expect("open: second arg must be mode string"); + if mode == "r" { + // Read mode: return file content + if let Some(file) = get_virtual_file(&path) { + match str::from_utf8(&file.content) { + Ok(text) => MontyObject::FileData { + path, + mode, + content: text.to_owned(), + } + .into(), + Err(_) => MontyException::new( + ExcType::UnicodeDecodeError, + Some("'utf-8' codec can't decode bytes".to_owned()), + ) + .into(), + } + } else { + MontyException::new( + ExcType::FileNotFoundError, + Some(format!("[Errno 2] No such file or directory: '{path}'")), + ) + .into() + } + } else { + // Write mode: return empty FileData (content is accumulated in VM) + MontyObject::FileData { + path, + mode, + content: String::new(), + } + .into() + } + } + OsFunction::FileClose => { + // args[0] is path, args[1] is written content + let content = String::try_from(&args[1]).expect("file.close: second arg must be content string"); + MUTABLE_VFS.with(|vfs| { + let mut vfs = vfs.borrow_mut(); + vfs.files.insert(path.clone(), (content.into_bytes(), 0o644)); + vfs.deleted_files.remove(&path); + }); + MontyObject::None.into() + } } } diff --git a/crates/monty/tests/os_tests.rs b/crates/monty/tests/os_tests.rs index 638b56fba..48471772a 100644 --- a/crates/monty/tests/os_tests.rs +++ b/crates/monty/tests/os_tests.rs @@ -54,6 +54,12 @@ fn run_to_oscall(code: &str) -> (OsFunction, Vec) { offset_seconds: None, timezone_name: None, }), + OsFunction::FileOpen => MontyObject::FileData { + path: "mock".to_owned(), + mode: "r".to_owned(), + content: "mock content".to_owned(), + }, + OsFunction::FileClose => MontyObject::None, }; let function = call.function; let args = call.args.clone(); diff --git a/crates/monty/tests/parse_errors.rs b/crates/monty/tests/parse_errors.rs index 1f62b0069..d2fcea74b 100644 --- a/crates/monty/tests/parse_errors.rs +++ b/crates/monty/tests/parse_errors.rs @@ -53,8 +53,8 @@ fn unknown_imports_compile_successfully_error_deferred_to_runtime() { } #[test] -fn with_statement_returns_not_implemented_error() { - let result = MontyRun::new("with open('f') as f: pass".to_owned(), "test.py", vec![]); +fn async_with_statement_returns_not_implemented_error() { + let result = MontyRun::new("async with open('f') as f: pass".to_owned(), "test.py", vec![]); assert_eq!(get_exc_type(result), ExcType::NotImplementedError); } diff --git a/scripts/iter_test_methods.py b/scripts/iter_test_methods.py index c9b46a488..f00c0f3e6 100644 --- a/scripts/iter_test_methods.py +++ b/scripts/iter_test_methods.py @@ -11,6 +11,8 @@ from __future__ import annotations +import builtins +import io import os import stat as stat_module from dataclasses import dataclass @@ -525,6 +527,62 @@ def items(self): os.environ = VirtualEnviron() +# ============================================================================= +# Virtual open() for context manager tests +# ============================================================================= + +if not hasattr(builtins, '_monty_original_open'): + builtins._monty_original_open = builtins.open # pyright: ignore[reportAttributeAccessIssue] + +_original_open = builtins._monty_original_open # pyright: ignore[reportAttributeAccessIssue,reportUnknownVariableType,reportUnknownMemberType] + + +def _virtual_open(file: str, mode: str = 'r', **kwargs: object) -> object: + """Virtual open() that reads/writes from the virtual filesystem for /virtual paths. + + For paths starting with '/virtual', uses the VirtualPath filesystem. + For all other paths, falls through to the real open(). + """ + if file.startswith('/virtual') or file.startswith('/nonexistent'): + if mode == 'r': + # Read from virtual filesystem + vpath = VirtualPath(file) + if not vpath.exists(): + raise FileNotFoundError(f"[Errno 2] No such file or directory: '{file}'") + content = vpath.read_text() + sio = io.StringIO(content) + sio.name = file + sio.mode = 'r' # pyright: ignore[reportAttributeAccessIssue] + return sio + if mode == 'w': + # Return a writable StringIO that flushes to VFS on close + class VirtualWriteFile(io.StringIO): + """StringIO wrapper that writes to virtual filesystem on close.""" + + def __init__(self, path: str) -> None: + super().__init__() + self._vpath = path + + def close(self) -> None: + if not self.closed: + VirtualPath(self._vpath).write_text(self.getvalue()) + super().close() + + def __enter__(self) -> 'VirtualWriteFile': + return self + + def __exit__(self, *args: object) -> bool: # pyright: ignore[reportIncompatibleMethodOverride] + self.close() + return False + + return VirtualWriteFile(file) + raise ValueError(f"invalid mode: '{mode}'") + return _original_open(file, mode, **kwargs) # pyright: ignore[reportUnknownVariableType,reportCallIssue,reportArgumentType] + + +builtins.open = _virtual_open + + # All external functions available to iter mode tests ITER_MODE_GLOBALS: dict[str, object] = { 'add_ints': add_ints,