Skip to content

Commit ef6ed33

Browse files
committed
Storable: use utf8_to_bytes_overwrite() if available
This is simpler and saves a malloc each time. Note that this code could use plain utf8_to_bytes() on older perls, but it is less convenient, so would require more code; I don't think the performance gain is worth it.
1 parent 87191dc commit ef6ed33

File tree

2 files changed

+36
-3
lines changed

2 files changed

+36
-3
lines changed

dist/Storable/Storable.xs

+35-2
Original file line numberDiff line numberDiff line change
@@ -2960,6 +2960,19 @@ static int store_hash(pTHX_ stcxt_t *cxt, HV *hv)
29602960
keyval = SvPV(key, keylen_tmp);
29612961
keylen = keylen_tmp;
29622962
if (SvUTF8(key)) {
2963+
2964+
#ifdef utf8_to_bytes_overwrite
2965+
2966+
/* If we are able to downgrade here; that means that we have a
2967+
* key which only had chars 0-255, but was utf8 encoded. */
2968+
if (utf8_to_bytes_overwrite( (U8**) &keyval, &keylen_tmp)) {
2969+
keylen = keylen_tmp;
2970+
flags |= SHV_K_WASUTF8;
2971+
}
2972+
else {
2973+
flags |= SHV_K_UTF8;
2974+
}
2975+
#else
29632976
const char *keysave = keyval;
29642977
bool is_utf8 = TRUE;
29652978

@@ -2982,6 +2995,7 @@ static int store_hash(pTHX_ stcxt_t *cxt, HV *hv)
29822995
to assign back to keylen. */
29832996
flags |= SHV_K_UTF8;
29842997
}
2998+
#endif
29852999
}
29863000

29873001
if (flagged_hash) {
@@ -3000,8 +3014,12 @@ static int store_hash(pTHX_ stcxt_t *cxt, HV *hv)
30003014
WLEN(keylen);
30013015
if (keylen)
30023016
WRITE(keyval, keylen);
3017+
3018+
#ifndef utf8_to_bytes_overwrite
3019+
30033020
if (flags & SHV_K_WASUTF8)
30043021
Safefree (keyval);
3022+
#endif
30053023
}
30063024

30073025
/*
@@ -7431,13 +7449,26 @@ static SV *do_retrieve(
74317449
if (SvUTF8(in)) {
74327450
STRLEN length;
74337451
const char *orig = SvPV(in, length);
7434-
char *asbytes;
74357452
/* This is quite deliberate. I want the UTF8 routines
74367453
to encounter the '\0' which perl adds at the end
74377454
of all scalars, so that any new string also has
74387455
this.
74397456
*/
74407457
STRLEN klen_tmp = length + 1;
7458+
7459+
# define CROAK_TO_BYTES_TEXT \
7460+
"Frozen string corrupt - contains characters outside 0-255"
7461+
# ifdef utf8_to_bytes_overwrite
7462+
7463+
if (! utf8_to_bytes_overwrite( (U8**) &orig, &klen_tmp)) {
7464+
CROAK((CROAK_TO_BYTES_TEXT));
7465+
}
7466+
7467+
SvLEN_set(in, klen_tmp);
7468+
SvCUR_set(in, klen_tmp - 1);
7469+
7470+
# else
7471+
char *asbytes;
74417472
bool is_utf8 = TRUE;
74427473

74437474
/* Just casting the &klen to (STRLEN) won't work
@@ -7447,7 +7478,7 @@ static SV *do_retrieve(
74477478
&klen_tmp,
74487479
&is_utf8);
74497480
if (is_utf8) {
7450-
CROAK(("Frozen string corrupt - contains characters outside 0-255"));
7481+
CROAK((CROAK_TO_BYTES_TEXT));
74517482
}
74527483
if (asbytes != orig) {
74537484
/* String has been converted.
@@ -7462,6 +7493,8 @@ static SV *do_retrieve(
74627493
SvLEN_set(in, klen_tmp);
74637494
SvCUR_set(in, klen_tmp - 1);
74647495
}
7496+
# endif
7497+
74657498
}
74667499
#endif
74677500
MBUF_SAVE_AND_LOAD(in);

dist/Storable/lib/Storable.pm

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ our @EXPORT_OK = qw(
3030
our ($canonical, $forgive_me);
3131

3232
BEGIN {
33-
our $VERSION = '3.35';
33+
our $VERSION = '3.36';
3434
}
3535

3636
our $recursion_limit;

0 commit comments

Comments
 (0)