From 788b844c836d68439c660649a86f8e11a0bf7d53 Mon Sep 17 00:00:00 2001 From: nick evans Date: Mon, 28 Oct 2024 15:41:01 -0400 Subject: [PATCH 1/3] Add support for ruby 3.2 Data objects --- lib/psych/class_loader.rb | 1 + lib/psych/visitors/to_ruby.rb | 22 +++++++++++++++ lib/psych/visitors/yaml_tree.rb | 10 +++++++ test/psych/test_object_references.rb | 5 ++++ test/psych/test_safe_load.rb | 32 ++++++++++++++++++++++ test/psych/test_yaml.rb | 39 +++++++++++++++++++++++++++ test/psych/visitors/test_yaml_tree.rb | 21 +++++++++++++++ 7 files changed, 130 insertions(+) diff --git a/lib/psych/class_loader.rb b/lib/psych/class_loader.rb index 50efc35e..c8f50972 100644 --- a/lib/psych/class_loader.rb +++ b/lib/psych/class_loader.rb @@ -6,6 +6,7 @@ module Psych class ClassLoader # :nodoc: BIG_DECIMAL = 'BigDecimal' COMPLEX = 'Complex' + DATA = 'Data' unless RUBY_VERSION < "3.2" DATE = 'Date' DATE_TIME = 'DateTime' EXCEPTION = 'Exception' diff --git a/lib/psych/visitors/to_ruby.rb b/lib/psych/visitors/to_ruby.rb index f0b4a94e..d1c7d668 100644 --- a/lib/psych/visitors/to_ruby.rb +++ b/lib/psych/visitors/to_ruby.rb @@ -197,6 +197,14 @@ def visit_Psych_Nodes_Mapping o s end + when /^!ruby\/data(?::(.*))?$/ + data = register(o, resolve_class($1).allocate) if $1 + members = {} + revive_data_members(members, o) + data ||= allocate_anon_data(o, members) + data.send(:initialize, **members) + data + when /^!ruby\/object:?(.*)?$/ name = $1 || 'Object' @@ -340,6 +348,20 @@ def register_empty object list end + def allocate_anon_data node, members + klass = class_loader.data.define(*members.keys) + register(node, klass.allocate) + end + + def revive_data_members hash, o + o.children.each_slice(2) do |k,v| + name = accept(k) + value = accept(v) + hash[class_loader.symbolize(name)] = value + end + hash + end + def revive_hash hash, o, tagged= false o.children.each_slice(2) { |k,v| key = accept(k) diff --git a/lib/psych/visitors/yaml_tree.rb b/lib/psych/visitors/yaml_tree.rb index e27c9279..6b878eee 100644 --- a/lib/psych/visitors/yaml_tree.rb +++ b/lib/psych/visitors/yaml_tree.rb @@ -162,6 +162,16 @@ def visit_Object o alias :visit_Delegator :visit_Object + def visit_Data o + tag = ['!ruby/data', o.class.name].compact.join(':') + register o, @emitter.start_mapping(nil, tag, false, Nodes::Mapping::BLOCK) + o.members.each do |member| + @emitter.scalar member.to_s, nil, nil, true, false, Nodes::Scalar::ANY + accept o.send member + end + @emitter.end_mapping + end + def visit_Struct o tag = ['!ruby/struct', o.class.name].compact.join(':') diff --git a/test/psych/test_object_references.rb b/test/psych/test_object_references.rb index 86bb9034..0498d54e 100644 --- a/test/psych/test_object_references.rb +++ b/test/psych/test_object_references.rb @@ -31,6 +31,11 @@ def test_struct_has_references assert_reference_trip Struct.new(:foo).new(1) end + def test_data_has_references + omit "Data requires ruby >= 3.2" if RUBY_VERSION < "3.2" + assert_reference_trip Data.define(:foo).new(1) + end + def assert_reference_trip obj yml = Psych.dump([obj, obj]) assert_match(/\*-?\d+/, yml) diff --git a/test/psych/test_safe_load.rb b/test/psych/test_safe_load.rb index a9ed7375..e6ca1e14 100644 --- a/test/psych/test_safe_load.rb +++ b/test/psych/test_safe_load.rb @@ -114,6 +114,38 @@ def test_anon_struct end end + D = Data.define(:d) unless RUBY_VERSION < "3.2" + + def test_data_depends_on_sym + omit "Data requires ruby >= 3.2" if RUBY_VERSION < "3.2" + assert_safe_cycle(D.new(nil), permitted_classes: [D, Symbol]) + assert_raise(Psych::DisallowedClass) do + cycle D.new(nil), permitted_classes: [D] + end + end + + def test_anon_data + omit "Data requires ruby >= 3.2" if RUBY_VERSION < "3.2" + assert Psych.safe_load(<<-eoyml, permitted_classes: [Data, Symbol]) +--- !ruby/data + foo: bar + eoyml + + assert_raise(Psych::DisallowedClass) do + Psych.safe_load(<<-eoyml, permitted_classes: [Data]) +--- !ruby/data + foo: bar + eoyml + end + + assert_raise(Psych::DisallowedClass) do + Psych.safe_load(<<-eoyml, permitted_classes: [Symbol]) +--- !ruby/data + foo: bar + eoyml + end + end + def test_safe_load_default_fallback assert_nil Psych.safe_load("") end diff --git a/test/psych/test_yaml.rb b/test/psych/test_yaml.rb index 897a7c89..fef2c781 100644 --- a/test/psych/test_yaml.rb +++ b/test/psych/test_yaml.rb @@ -6,6 +6,7 @@ # [ruby-core:01946] module Psych_Tests StructTest = Struct::new( :c ) + DataTest = Data.define( :c ) unless RUBY_VERSION < "3.2" end class Psych_Unit_Tests < Psych::TestCase @@ -1071,6 +1072,44 @@ def test_ruby_struct end + def test_ruby_data + omit "Data requires ruby >= 3.2" if RUBY_VERSION < "3.2" + Object.remove_const :MyBookData if Object.const_defined?(:MyBookData) + # Ruby Data value objects + book_class = Data.define(:author, :title, :year, :isbn) + Object.const_set(:MyBookData, book_class) + assert_to_yaml( + [ book_class.new( "Yukihiro Matsumoto", "Ruby in a Nutshell", 2002, "0-596-00214-9" ), + book_class.new( [ 'Dave Thomas', 'Andy Hunt' ], "The Pickaxe", 2002, + book_class.new( "This should be the ISBN", "but I have more data here", 2002, "None" ) + ) + ], <= 3.2" if RUBY_VERSION < "3.2" + assert_cycle D.new('bar') + end + + def test_data_anon + omit "Data requires ruby >= 3.2" if RUBY_VERSION < "3.2" + d = Data.define(:foo).new('bar') + obj = Psych.unsafe_load(Psych.dump(d)) + assert_equal d.foo, obj.foo + end + + def test_data_override_method + omit "Data requires ruby >= 3.2" if RUBY_VERSION < "3.2" + d = Data.define(:method).new('override') + obj = Psych.unsafe_load(Psych.dump(d)) + assert_equal d.method, obj.method + end + def test_exception ex = Exception.new 'foo' loaded = Psych.unsafe_load(Psych.dump(ex)) From 3b31dc998a68db11767ccda3d13fe1e588ae79ea Mon Sep 17 00:00:00 2001 From: nick evans Date: Mon, 28 Oct 2024 15:41:26 -0400 Subject: [PATCH 2/3] Add support for Data objects with ivars MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This sets the ivars _before_ calling initialize, which feels wrong. But Data doesn't give us any mechanism for setting the members other than 1) initialize, or 2) drop down into the C API. Since initialize freezes the object, we need to set the ivars before that. I think this is a reasonable compromise—if users need better handling, they can implement their own `encode_with` and `init_with`. But it will lead to unhappy surprises for some users. Alternatively, we could use the C API, similarly to Marshal. Psych _is_ already using the C API for path2class and build_exception. This would be the least surprising behavior for users, I think. --- lib/psych/visitors/to_ruby.rb | 23 +++++++-- lib/psych/visitors/yaml_tree.rb | 40 +++++++++++--- test/psych/test_data.rb | 69 +++++++++++++++++++++++++ test/psych/test_serialize_subclasses.rb | 18 +++++++ 4 files changed, 141 insertions(+), 9 deletions(-) create mode 100644 test/psych/test_data.rb diff --git a/lib/psych/visitors/to_ruby.rb b/lib/psych/visitors/to_ruby.rb index d1c7d668..75132811 100644 --- a/lib/psych/visitors/to_ruby.rb +++ b/lib/psych/visitors/to_ruby.rb @@ -197,10 +197,27 @@ def visit_Psych_Nodes_Mapping o s end - when /^!ruby\/data(?::(.*))?$/ - data = register(o, resolve_class($1).allocate) if $1 + when /^!ruby\/data(-with-ivars)?(?::(.*))?$/ + data = register(o, resolve_class($2).allocate) if $2 members = {} - revive_data_members(members, o) + + if $1 # data-with-ivars + ivars = {} + o.children.each_slice(2) do |type, vars| + case accept(type) + when 'members' + revive_data_members(members, vars) + data ||= allocate_anon_data(o, members) + when 'ivars' + revive_hash(ivars, vars) + end + end + ivars.each do |ivar, v| + data.instance_variable_set ivar, v + end + else + revive_data_members(members, o) + end data ||= allocate_anon_data(o, members) data.send(:initialize, **members) data diff --git a/lib/psych/visitors/yaml_tree.rb b/lib/psych/visitors/yaml_tree.rb index 6b878eee..59fd7204 100644 --- a/lib/psych/visitors/yaml_tree.rb +++ b/lib/psych/visitors/yaml_tree.rb @@ -163,13 +163,41 @@ def visit_Object o alias :visit_Delegator :visit_Object def visit_Data o - tag = ['!ruby/data', o.class.name].compact.join(':') - register o, @emitter.start_mapping(nil, tag, false, Nodes::Mapping::BLOCK) - o.members.each do |member| - @emitter.scalar member.to_s, nil, nil, true, false, Nodes::Scalar::ANY - accept o.send member + ivars = o.instance_variables + if ivars.empty? + tag = ['!ruby/data', o.class.name].compact.join(':') + register o, @emitter.start_mapping(nil, tag, false, Nodes::Mapping::BLOCK) + o.members.each do |member| + @emitter.scalar member.to_s, nil, nil, true, false, Nodes::Scalar::ANY + accept o.send member + end + @emitter.end_mapping + + else + tag = ['!ruby/data-with-ivars', o.class.name].compact.join(':') + node = @emitter.start_mapping(nil, tag, false, Psych::Nodes::Mapping::BLOCK) + register(o, node) + + # Dump the members + accept 'members' + @emitter.start_mapping nil, nil, true, Nodes::Mapping::BLOCK + o.members.each do |member| + @emitter.scalar member.to_s, nil, nil, true, false, Nodes::Scalar::ANY + accept o.send member + end + @emitter.end_mapping + + # Dump the ivars + accept 'ivars' + @emitter.start_mapping nil, nil, true, Nodes::Mapping::BLOCK + ivars.each do |ivar| + accept ivar.to_s + accept o.instance_variable_get ivar + end + @emitter.end_mapping + + @emitter.end_mapping end - @emitter.end_mapping end def visit_Struct o diff --git a/test/psych/test_data.rb b/test/psych/test_data.rb new file mode 100644 index 00000000..a67a037b --- /dev/null +++ b/test/psych/test_data.rb @@ -0,0 +1,69 @@ +# frozen_string_literal: true +require_relative 'helper' + +class PsychDataWithIvar < Data.define(:foo) + attr_reader :bar + def initialize(**) + @bar = 'hello' + super + end +end unless RUBY_VERSION < "3.2" + +module Psych + class TestData < TestCase + class SelfReferentialData < Data.define(:foo) + attr_accessor :ref + def initialize(foo:) + @ref = self + super + end + end unless RUBY_VERSION < "3.2" + + def setup + omit "Data requires ruby >= 3.2" if RUBY_VERSION < "3.2" + end + + # TODO: move to another test? + def test_dump_data + assert_equal <<~eoyml, Psych.dump(PsychDataWithIvar["bar"]) + --- !ruby/data-with-ivars:PsychDataWithIvar + members: + foo: bar + ivars: + "@bar": hello + eoyml + end + + def test_self_referential_data + circular = SelfReferentialData.new("foo") + + loaded = Psych.unsafe_load(Psych.dump(circular)) + assert_instance_of(SelfReferentialData, loaded.ref) + + assert_equal(circular, loaded) + assert_same(loaded, loaded.ref) + end + + def test_roundtrip + thing = PsychDataWithIvar.new("bar") + data = Psych.unsafe_load(Psych.dump(thing)) + + assert_equal "hello", data.bar + assert_equal "bar", data.foo + end + + def test_load + obj = Psych.unsafe_load(<<~eoyml) + --- !ruby/data-with-ivars:PsychDataWithIvar + members: + foo: bar + ivars: + "@bar": hello + eoyml + + assert_equal "hello", obj.bar + assert_equal "bar", obj.foo + end + end +end + diff --git a/test/psych/test_serialize_subclasses.rb b/test/psych/test_serialize_subclasses.rb index 344c79b3..640c3313 100644 --- a/test/psych/test_serialize_subclasses.rb +++ b/test/psych/test_serialize_subclasses.rb @@ -35,5 +35,23 @@ def test_struct_subclass so = StructSubclass.new('foo', [1,2,3]) assert_equal so, Psych.unsafe_load(Psych.dump(so)) end + + class DataSubclass < Data.define(:foo) + def initialize(foo:) + @bar = "hello #{foo}" + super(foo: foo) + end + + def == other + super(other) && @bar == other.instance_eval{ @bar } + end + end unless RUBY_VERSION < "3.2" + + def test_data_subclass + omit "Data requires ruby >= 3.2" if RUBY_VERSION < "3.2" + so = DataSubclass.new('foo') + assert_equal so, Psych.unsafe_load(Psych.dump(so)) + end + end end From 3573fb356e5940c45fcdb40ca89ebfcca9507eeb Mon Sep 17 00:00:00 2001 From: nick evans Date: Fri, 8 Nov 2024 10:00:46 -0500 Subject: [PATCH 3/3] Use `rb_struct_initialize` to initialize Data --- ext/psych/psych_to_ruby.c | 10 ++++++++++ lib/psych/visitors/to_ruby.rb | 3 ++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/ext/psych/psych_to_ruby.c b/ext/psych/psych_to_ruby.c index ffe0c69c..d473a5f8 100644 --- a/ext/psych/psych_to_ruby.c +++ b/ext/psych/psych_to_ruby.c @@ -24,6 +24,15 @@ static VALUE path2class(VALUE self, VALUE path) return rb_path_to_class(path); } +static VALUE init_struct(VALUE self, VALUE data, VALUE attrs) +{ + VALUE args = rb_ary_new2(1); + rb_ary_push(args, attrs); + rb_struct_initialize(data, args); + + return data; +} + void Init_psych_to_ruby(void) { VALUE psych = rb_define_module("Psych"); @@ -33,6 +42,7 @@ void Init_psych_to_ruby(void) VALUE visitor = rb_define_class_under(visitors, "Visitor", rb_cObject); cPsychVisitorsToRuby = rb_define_class_under(visitors, "ToRuby", visitor); + rb_define_private_method(cPsychVisitorsToRuby, "init_struct", init_struct, 2); rb_define_private_method(cPsychVisitorsToRuby, "build_exception", build_exception, 2); rb_define_private_method(class_loader, "path2class", path2class, 1); } diff --git a/lib/psych/visitors/to_ruby.rb b/lib/psych/visitors/to_ruby.rb index 75132811..01e3bd29 100644 --- a/lib/psych/visitors/to_ruby.rb +++ b/lib/psych/visitors/to_ruby.rb @@ -219,7 +219,8 @@ def visit_Psych_Nodes_Mapping o revive_data_members(members, o) end data ||= allocate_anon_data(o, members) - data.send(:initialize, **members) + init_struct(data, **members) + data.freeze data when /^!ruby\/object:?(.*)?$/