@@ -51,6 +51,21 @@ def v2_dtypes() -> st.SearchStrategy[np.dtype]:
51
51
)
52
52
53
53
54
+ def safe_unicode_for_dtype (dtype : np .dtype [np .str_ ]) -> st .SearchStrategy [str ]:
55
+ """Generate UTF-8-safe text constrained to max_len of dtype."""
56
+ # account for utf-32 encoding (i.e. 4 bytes/character)
57
+ max_len = max (1 , dtype .itemsize // 4 )
58
+
59
+ return st .text (
60
+ alphabet = st .characters (
61
+ blacklist_categories = ["Cs" ], # Avoid *technically allowed* surrogates
62
+ min_codepoint = 32 ,
63
+ ),
64
+ min_size = 1 ,
65
+ max_size = max_len ,
66
+ )
67
+
68
+
54
69
# From https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#node-names
55
70
# 1. must not be the empty string ("")
56
71
# 2. must not include the character "/"
@@ -86,7 +101,12 @@ def numpy_arrays(
86
101
Generate numpy arrays that can be saved in the provided Zarr format.
87
102
"""
88
103
zarr_format = draw (zarr_formats )
89
- return draw (npst .arrays (dtype = v3_dtypes () if zarr_format == 3 else v2_dtypes (), shape = shapes ))
104
+ dtype = draw (v3_dtypes () if zarr_format == 3 else v2_dtypes ())
105
+ if np .issubdtype (dtype , np .str_ ):
106
+ safe_unicode_strings = safe_unicode_for_dtype (dtype )
107
+ return draw (npst .arrays (dtype = dtype , shape = shapes , elements = safe_unicode_strings ))
108
+
109
+ return draw (npst .arrays (dtype = dtype , shape = shapes ))
90
110
91
111
92
112
@st .composite # type: ignore[misc]
0 commit comments