Skip to content

Commit 29e6af0

Browse files
committed
Implement unboxed_ary to avoid repeated access to packed data
Ruby objects use a lot of bit packing to save on memory, and also have multiple layouts, it's particularly true for Arrays which can be embedded, heap allocated, or shared. This saves memory, but can cause a lot of execution overhead when the same piece of information is packed and unpacked many times. Instead if we primarily work with a "normalized" representation of the object, that we keep in sync with the actual RArray, we can save quite a bit of overhead as showcase in the benchmark. Of course applying this to more than a couple functions is a lot of work and it's unclear whether it's worth it or not. ``` $ make -j benchmark ITEM="rb_ary_push" BUILT_RUBY="./miniruby" COMPARE_RUBY="./miniruby-baseline" /opt/rubies/3.3.4/bin/ruby --disable=gems -rrubygems -I../benchmark/lib ../benchmark/benchmark-driver/exe/benchmark-driver \ --executables="compare-ruby::./miniruby-baseline -I.ext/common --disable-gem" \ --executables="built-ruby::./miniruby -I../lib -I. -I.ext/common ../tool/runruby.rb --extout=.ext -- --disable-gems --disable-gem" \ --output=markdown --output-compare -v $(find ../benchmark -maxdepth 1 -name 'rb_ary_push' -o -name '*rb_ary_push*.yml' -o -name '*rb_ary_push*.rb' | sort) compare-ruby: ruby 3.4.0dev (2024-12-05T19:11:39Z ary-batch-info e9407cf) +PRISM [arm64-darwin23] built-ruby: ruby 3.4.0dev (2024-12-05T19:11:39Z ary-batch-info 46757e76db) +PRISM [arm64-darwin23] warming up.. | |compare-ruby|built-ruby| |:-------|-----------:|---------:| |64*2k | 848.804| 1.151k| | | -| 1.36x| |128*1k | 1.651k| 2.217k| | | -| 1.34x| ```
1 parent e9407cf commit 29e6af0

File tree

1 file changed

+210
-9
lines changed

1 file changed

+210
-9
lines changed

array.c

+210-9
Original file line numberDiff line numberDiff line change
@@ -521,7 +521,9 @@ rb_ary_set_shared(VALUE ary, VALUE shared_root)
521521
static inline void
522522
rb_ary_modify_check(VALUE ary)
523523
{
524-
rb_check_frozen(ary);
524+
if (RB_OBJ_FROZEN_RAW(ary)) {
525+
rb_check_frozen(ary);
526+
}
525527
ary_verify(ary);
526528
}
527529

@@ -573,6 +575,200 @@ rb_ary_modify(VALUE ary)
573575
rb_ary_cancel_sharing(ary);
574576
}
575577

578+
579+
typedef enum ary_type {
580+
ary_heap = 0,
581+
ary_shared = RARRAY_SHARED_FLAG,
582+
ary_embed = RARRAY_EMBED_FLAG,
583+
} ary_type;
584+
585+
#define UARY_TYPE_MASK (RARRAY_SHARED_FLAG | RARRAY_EMBED_FLAG)
586+
587+
typedef struct unboxed_ary {
588+
VALUE ary;
589+
ary_type type;
590+
long len;
591+
long capa;
592+
const VALUE *ptr;
593+
} unboxed_ary;
594+
595+
#define uary_verify(uary) ary_verify((uary)->ary)
596+
597+
static inline void
598+
unbox_ary(VALUE ary, unboxed_ary *uary)
599+
{
600+
RUBY_ASSERT(RB_TYPE_P(ary, T_ARRAY));
601+
ary_verify(ary);
602+
603+
uary->ary = ary;
604+
uary->type = (ary_type)(FL_TEST_RAW(ary, UARY_TYPE_MASK));
605+
606+
switch (uary->type) {
607+
case ary_heap:
608+
uary->len = ARY_HEAP_LEN(ary);
609+
uary->capa = ARY_SHARED_ROOT_P(ary) ? uary->len : ARY_HEAP_CAPA(ary);
610+
uary->ptr = RARRAY(ary)->as.heap.ptr;
611+
break;
612+
case ary_shared:
613+
uary->len = ARY_HEAP_LEN(ary);
614+
uary->capa = ARY_HEAP_LEN(ary);
615+
uary->ptr = RARRAY(ary)->as.heap.ptr;
616+
break;
617+
case ary_embed:
618+
uary->len = ARY_EMBED_LEN(ary);
619+
uary->capa = ary_embed_capa(ary);
620+
uary->ptr = RARRAY(ary)->as.ary;
621+
break;
622+
}
623+
}
624+
625+
static void
626+
uary_spill(unboxed_ary *uary, VALUE *ptr, long capa)
627+
{
628+
RUBY_ASSERT(uary->type == ary_embed);
629+
630+
MEMCPY(ptr, uary->ptr, VALUE, uary->len);
631+
uary->type = ary_heap;
632+
uary->ptr = ptr;
633+
uary->capa = capa;
634+
FL_UNSET_EMBED(uary->ary);
635+
ARY_SET_PTR(uary->ary, ptr);
636+
ARY_SET_HEAP_LEN(uary->ary, uary->len);
637+
ARY_SET_CAPA(uary->ary, uary->capa);
638+
}
639+
640+
static void
641+
uary_set_len(unboxed_ary *uary, long len)
642+
{
643+
switch (uary->type) {
644+
case ary_heap:
645+
case ary_shared:
646+
ARY_SET_HEAP_LEN(uary->ary, len);
647+
break;
648+
case ary_embed:
649+
ARY_SET_EMBED_LEN(uary->ary, len);
650+
break;
651+
}
652+
uary->len = len;
653+
}
654+
655+
static void
656+
uary_reembed(unboxed_ary *uary, long capa)
657+
{
658+
if (uary->len > capa) uary->len = capa;
659+
660+
MEMCPY((VALUE *)RARRAY(uary->ary)->as.ary, uary->ptr, VALUE, uary->len);
661+
ary_heap_free_ptr(uary->ary, uary->ptr, uary->capa);
662+
663+
uary->type = ary_embed;
664+
FL_SET_EMBED(uary->ary);
665+
ARY_SET_EMBED_LEN(uary->ary, uary->len);
666+
}
667+
668+
static size_t
669+
uary_realloc(unboxed_ary *uary, long capa)
670+
{
671+
ARY_SET_CAPA(uary->ary, capa);
672+
return uary->capa = ary_heap_realloc(uary->ary, capa);
673+
}
674+
675+
static void
676+
uary_resize_capa(unboxed_ary *uary, long capacity)
677+
{
678+
RUBY_ASSERT(uary->len <= capacity);
679+
RUBY_ASSERT(!OBJ_FROZEN(uary->ary));
680+
RUBY_ASSERT(uary->type != ary_shared);
681+
682+
if (capacity > ary_embed_capa(uary->ary)) {
683+
if (uary->type == ary_embed) {
684+
uary_spill(uary, ary_heap_alloc_buffer(capacity), capacity);
685+
}
686+
else {
687+
uary_realloc(uary, capacity);
688+
}
689+
}
690+
else {
691+
if (uary->type != ary_embed) {
692+
uary_reembed(uary, capacity);
693+
}
694+
}
695+
696+
ary_verify(ary);
697+
}
698+
699+
static void
700+
uary_double_capa(unboxed_ary *uary, long min)
701+
{
702+
long new_capa = uary->capa / 2;
703+
704+
if (new_capa < ARY_DEFAULT_SIZE) {
705+
new_capa = ARY_DEFAULT_SIZE;
706+
}
707+
if (new_capa >= ARY_MAX_SIZE - min) {
708+
new_capa = (ARY_MAX_SIZE - min) / 2;
709+
}
710+
new_capa += min;
711+
uary_resize_capa(uary, new_capa);
712+
713+
uary_verify(uary);
714+
}
715+
716+
static void
717+
uary_modify(unboxed_ary *uary)
718+
{
719+
rb_ary_modify(uary->ary);
720+
if (uary->type == ary_shared) {
721+
// TODO: Ideally we'd have a `uary_modify`
722+
unbox_ary(uary->ary, uary);
723+
}
724+
}
725+
726+
static VALUE
727+
uary_ensure_room_for_push(unboxed_ary *uary, long add_len)
728+
{
729+
long new_len = uary->len + add_len;
730+
731+
if (UNLIKELY(uary->len > ARY_MAX_SIZE - add_len)) {
732+
rb_raise(rb_eIndexError, "index %ld too big", new_len);
733+
}
734+
735+
if (UNLIKELY(uary->type == ary_shared)) {
736+
if (new_len > uary->capa) {
737+
VALUE shared_root = ARY_SHARED_ROOT(uary->ary);
738+
if (ARY_SHARED_ROOT_OCCUPIED(shared_root)) {
739+
if (uary->ptr - RARRAY_CONST_PTR(shared_root) + new_len <= RARRAY_LEN(shared_root)) {
740+
rb_ary_modify_check(uary->ary);
741+
742+
ary_verify(uary->ary);
743+
ary_verify(shared_root);
744+
return shared_root;
745+
}
746+
else {
747+
/* if array is shared, then it is likely it participate in push/shift pattern */
748+
rb_ary_modify(uary->ary);
749+
if (new_len > uary->capa - (uary->capa >> 6)) {
750+
uary_double_capa(uary, new_len);
751+
}
752+
ary_verify(uary->ary);
753+
return uary->ary;
754+
}
755+
}
756+
}
757+
uary_verify(uary);
758+
uary_modify(uary);
759+
}
760+
else {
761+
rb_ary_modify_check(uary->ary);
762+
}
763+
764+
if (UNLIKELY(new_len > uary->capa)) {
765+
uary_double_capa(uary, new_len);
766+
}
767+
768+
uary_verify(uary);
769+
return uary->ary;
770+
}
771+
576772
static VALUE
577773
ary_ensure_room_for_push(VALUE ary, long add_len)
578774
{
@@ -1377,23 +1573,28 @@ ary_take_first_or_last(int argc, const VALUE *argv, VALUE ary, enum ary_take_pos
13771573
VALUE
13781574
rb_ary_push(VALUE ary, VALUE item)
13791575
{
1380-
long idx = RARRAY_LEN((ary_verify(ary), ary));
1381-
VALUE target_ary = ary_ensure_room_for_push(ary, 1);
1576+
unboxed_ary uary;
1577+
unbox_ary(ary, &uary);
1578+
1579+
long idx = uary.len;
1580+
VALUE target_ary = uary_ensure_room_for_push(&uary, 1);
13821581
RARRAY_PTR_USE(ary, ptr, {
13831582
RB_OBJ_WRITE(target_ary, &ptr[idx], item);
13841583
});
1385-
ARY_SET_LEN(ary, idx + 1);
1386-
ary_verify(ary);
1584+
uary_set_len(&uary, idx + 1);
1585+
uary_verify(&uary);
13871586
return ary;
13881587
}
13891588

13901589
VALUE
13911590
rb_ary_cat(VALUE ary, const VALUE *argv, long len)
13921591
{
1393-
long oldlen = RARRAY_LEN(ary);
1394-
VALUE target_ary = ary_ensure_room_for_push(ary, len);
1395-
ary_memcpy0(ary, oldlen, len, argv, target_ary);
1396-
ARY_SET_LEN(ary, oldlen + len);
1592+
unboxed_ary uary;
1593+
unbox_ary(ary, &uary);
1594+
1595+
VALUE target_ary = uary_ensure_room_for_push(&uary, len);
1596+
ary_memcpy0(ary, uary.len, len, argv, target_ary);
1597+
uary_set_len(&uary, uary.len + len);
13971598
return ary;
13981599
}
13991600

0 commit comments

Comments
 (0)