From b3798f79c61632ce0aac5d86956d9fb17006a136 Mon Sep 17 00:00:00 2001 From: Xenia Lu Date: Fri, 10 Oct 2025 13:19:06 +0800 Subject: [PATCH 1/5] feat: add readonly StringInfo for moontest --- Makefile | 4 ++- meson.build | 3 ++ src/rustica/adt/stringbuilder.c | 57 +++++++++++++++++++++++++++------ src/rustica/datatypes.c | 17 ++++++++++ src/rustica/datatypes.h | 4 +++ src/rustica/main.c | 3 ++ src/rustica/moontest.c | 18 ++++------- src/rustica/wamr.c | 17 ---------- src/rustica/wamr.h | 4 --- subprojects/wamr.wrap | 6 +++- 10 files changed, 90 insertions(+), 43 deletions(-) diff --git a/Makefile b/Makefile index 5dbb790..121558b 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,9 @@ DEV_LLVM_DIR = /usr/lib/llvm18 # Default target: build for development .PHONY: build -build: $(BUILD_DIR)/install +build: + touch $(BUILD_DIR)/build/.stamp + $(MAKE) $(BUILD_DIR)/install/.stamp $(BUILD_DIR)/build/.stamp: uv run meson.py setup $(BUILD_DIR)/build --prefix=/ -Dllvm_dir=$(DEV_LLVM_DIR) diff --git a/meson.build b/meson.build index a5effeb..acb71c1 100644 --- a/meson.build +++ b/meson.build @@ -270,6 +270,9 @@ uncommon_shared_lib = shared_library('uncommon_shared', rustica_shared = files( 'src/rustica/env.c', + 'src/rustica/datatypes.c', + 'src/rustica/adt/text.c', + 'src/rustica/adt/stringbuilder.c', ) rustica_deps = [vmlib, pg.get_variable('uuid')] rustica_cargs = wamr.get_cmake_definitions('-DWASM') diff --git a/src/rustica/adt/stringbuilder.c b/src/rustica/adt/stringbuilder.c index 2bb1820..6c73c23 100644 --- a/src/rustica/adt/stringbuilder.c +++ b/src/rustica/adt/stringbuilder.c @@ -23,21 +23,40 @@ sb_new(wasm_exec_env_t exec_env, int32_t size_hint) { return rst_externref_of_obj(exec_env, obj); } +static wasm_externref_obj_t +sb_read_text(wasm_exec_env_t exec_env, wasm_obj_t ref) { + Datum txt_datum = wasm_externref_obj_get_datum(ref, TEXTOID); + text *txt = DatumGetTextPP(txt_datum); + char *start = VARDATA_ANY(txt); + obj_t obj = rst_obj_new(exec_env, OBJ_STRING_INFO, ref, sizeof(StringInfoData)); + if (txt_datum != PointerGetDatum(txt)) + obj->flags |= OBJ_OWNS_BODY_MEMBERS; + obj->body.sb->data = (char *)txt; + obj->body.sb->cursor = start - (char *)txt; + obj->body.sb->len = VARSIZE_ANY_EXHDR(txt) + obj->body.sb->cursor; + obj->body.sb->maxlen = 0; // read-only + return rst_externref_of_obj(exec_env, obj); +} + static inline StringInfo -sb_ensure_string_info(wasm_obj_t refobj) { +sb_ensure_string_info(wasm_obj_t refobj, bool readonly) { obj_t obj = wasm_externref_obj_get_obj(refobj, OBJ_STRING_INFO); + if (!readonly && obj->body.sb->maxlen == 0) + ereport(ERROR, errmsg("StringInfo is read-only")); + if (readonly && obj->body.sb->maxlen != 0) + ereport(ERROR, errmsg("StringInfo is read-write")); return obj->body.sb; } static int32_t sb_mblength(wasm_exec_env_t exec_env, wasm_obj_t refobj) { - StringInfo sb = sb_ensure_string_info(refobj); + StringInfo sb = sb_ensure_string_info(refobj, false); return sb->len; } static int32_t sb_write_string(wasm_exec_env_t exec_env, wasm_obj_t refobj, wasm_obj_t str) { - StringInfo sb = sb_ensure_string_info(refobj); + StringInfo sb = sb_ensure_string_info(refobj, false); Datum jsstr = wasm_externref_obj_get_datum(str, TEXTOID); text *txt = DatumGetTextPP(jsstr); appendBinaryStringInfoNT(sb, VARDATA_ANY(txt), VARSIZE_ANY_EXHDR(txt)); @@ -52,7 +71,7 @@ sb_write_substring(wasm_exec_env_t exec_env, wasm_obj_t str, int32_t start, int32_t len) { - StringInfo sb = sb_ensure_string_info(refobj); + StringInfo sb = sb_ensure_string_info(refobj, false); Datum jsstr = wasm_externref_obj_get_datum(str, TEXTOID); text *txt = DatumGetTextPP(jsstr); char *data = VARDATA_ANY(txt); @@ -69,7 +88,7 @@ sb_write_substring(wasm_exec_env_t exec_env, static int32_t sb_write_char(wasm_exec_env_t exec_env, wasm_obj_t refobj, int32_t ch) { - StringInfo sb = sb_ensure_string_info(refobj); + StringInfo sb = sb_ensure_string_info(refobj, false); enlargeStringInfo(sb, MAX_UNICODE_EQUIVALENT_STRING); pg_unicode_to_server(ch, (unsigned char *)sb->data + sb->len); sb->len += (int)strlen(sb->data + sb->len); @@ -82,7 +101,7 @@ sb_write_bytes(wasm_exec_env_t exec_env, wasm_obj_t bytes, int32_t start, int32_t len) { - StringInfo sb = sb_ensure_string_info(refobj); + StringInfo sb = sb_ensure_string_info(refobj, false); bytea *b = DatumGetByteaP(wasm_externref_obj_get_datum(bytes, BYTEAOID)); if (start < 0 || start + len > VARSIZE_ANY_EXHDR(b)) ereport(ERROR, errmsg("sb_write_bytes: index out of bound")); @@ -92,7 +111,7 @@ sb_write_bytes(wasm_exec_env_t exec_env, static int32_t sb_write_byte(wasm_exec_env_t exec_env, wasm_obj_t refobj, int32_t byte) { - StringInfo sb = sb_ensure_string_info(refobj); + StringInfo sb = sb_ensure_string_info(refobj, false); enlargeStringInfo(sb, 1); sb->data[sb->len++] = (char)byte; return 0; @@ -100,18 +119,37 @@ sb_write_byte(wasm_exec_env_t exec_env, wasm_obj_t refobj, int32_t byte) { static wasm_externref_obj_t sb_to_string(wasm_exec_env_t exec_env, wasm_obj_t refobj) { - StringInfo sb = sb_ensure_string_info(refobj); + StringInfo sb = sb_ensure_string_info(refobj, false); return cstring_into_varatt_obj(exec_env, sb->data, sb->len, TEXTOID); } static wasm_externref_obj_t sb_to_bytes(wasm_exec_env_t exec_env, wasm_obj_t refobj) { - StringInfo sb = sb_ensure_string_info(refobj); + StringInfo sb = sb_ensure_string_info(refobj, false); return cstring_into_varatt_obj(exec_env, sb->data, sb->len, BYTEAOID); } +static int32_t +sb_read_char(wasm_exec_env_t exec_env, wasm_obj_t refobj) { + int ch; + pg_wchar rv[2] = { '?', 0 }; + StringInfo sb = sb_ensure_string_info(refobj, true); + if (sb->cursor >= sb->len) + return -1; // EOF + ch = pg_mblen(sb->data + sb->cursor); + if (ch > 0 && sb->cursor + ch <= sb->len) { + pg_mb2wchar_with_len(sb->data + sb->cursor, rv, ch); + sb->cursor += ch; + } else { + // Invalid UTF-8 sequence, skip one byte + sb->cursor += 1; + } + return (int32_t)rv[0]; +} + static NativeSymbol sb_symbols[] = { { "sb_new", sb_new, "(i)r" }, + { "si_read_text", sb_read_text, "(r)r" }, { "sb_mblength", sb_mblength, "(r)i" }, { "sb_write_string", sb_write_string, "(rr)i" }, { "sb_write_substring", sb_write_substring, "(rrii)i" }, @@ -120,6 +158,7 @@ static NativeSymbol sb_symbols[] = { { "sb_write_byte", sb_write_byte, "(ri)i" }, { "sb_to_string", sb_to_string, "(r)r" }, { "sb_to_bytes", sb_to_bytes, "(r)r" }, + { "si_read_char", sb_read_char, "(r)i" }, }; void diff --git a/src/rustica/datatypes.c b/src/rustica/datatypes.c index 9a7df4f..53ddbd0 100644 --- a/src/rustica/datatypes.c +++ b/src/rustica/datatypes.c @@ -195,3 +195,20 @@ cstring_into_varatt_obj(wasm_exec_env_t exec_env, memcpy(VARDATA_ANY(obj->body.ptr), data, len); return rst_externref_of_obj(exec_env, obj); } + +void +wasm_runtime_remove_local_obj_ref(wasm_exec_env_t exec_env, + wasm_local_obj_ref_t *me) { + wasm_local_obj_ref_t *current = + wasm_runtime_get_cur_local_obj_ref(exec_env); + if (current == me) + wasm_runtime_pop_local_obj_ref(exec_env); + else { + wasm_local_obj_ref_t *next; + while (current != me) { + next = current; + current = current->prev; + } + next->prev = me->prev; + } +} diff --git a/src/rustica/datatypes.h b/src/rustica/datatypes.h index 4e59536..1961f9a 100644 --- a/src/rustica/datatypes.h +++ b/src/rustica/datatypes.h @@ -127,4 +127,8 @@ rst_register_natives_uuid(); void rst_init_context_for_jsonb(wasm_exec_env_t exec_env); +void +wasm_runtime_remove_local_obj_ref(wasm_exec_env_t exec_env, + wasm_local_obj_ref_t *me); + #endif /* RUSTICA_DATATYPES_H */ diff --git a/src/rustica/main.c b/src/rustica/main.c index 6b2325f..e7f801c 100644 --- a/src/rustica/main.c +++ b/src/rustica/main.c @@ -11,6 +11,7 @@ #include "gc_export.h" #include "aot_export.h" +#include "rustica/datatypes.h" #include "rustica/env.h" #include "rustica/moontest.h" @@ -208,6 +209,8 @@ init_wamr() { ereport(ERROR, errmsg("Failed to initialize WASM runtime")); rustica_register_natives(); + rst_register_natives_text(); + rst_register_natives_stringbuilder(); } static inline uint8_t * diff --git a/src/rustica/moontest.c b/src/rustica/moontest.c index d122f40..2a34473 100644 --- a/src/rustica/moontest.c +++ b/src/rustica/moontest.c @@ -9,6 +9,7 @@ #include "utils/json.h" #include "rustica/env.h" +#include "rustica/datatypes.h" #include "rustica/moontest.h" extern const char *progname; @@ -179,8 +180,7 @@ static JsonParseErrorType json_object_end_cb(void *state) { JsonParseState *parse_state = state; wasm_function_inst_t func; - rustica_value_t filename = NULL; - uintptr_t filename_ref = 0; + wasm_externref_obj_t filename = NULL; StringInfo filename_buf = NULL, msg = NULL, escaped_msg = NULL; const char *exc; @@ -194,19 +194,15 @@ json_object_end_cb(void *state) { for (int i = parse_state->range_start; i < parse_state->range_end; i++) { wasm_val_t args[2]; + if (!filename) { Assert(parse_state->filename != NULL); - filename = rustica_value_new(RUSTICA_ENV_CSTRING, - parse_state->filename, - 0); + filename = cstring_into_varatt_obj(parse_state->exec_env, + parse_state->filename, strlen(parse_state->filename), TEXTOID); } - if (!filename_ref) - filename_ref = - (uintptr_t)rustica_value_to_wasm(parse_state->exec_env, - filename); args[0].kind = WASM_EXTERNREF; - args[0].of.foreign = filename_ref; + args[0].of.foreign = (uintptr_t)filename; args[1].kind = WASM_I32; args[1].of.i32 = i; @@ -218,7 +214,7 @@ json_object_end_cb(void *state) { args)) { if (!filename_buf) { filename_buf = makeStringInfo(); - escape_json(filename_buf, filename->ptr); + escape_json(filename_buf, parse_state->filename); } if (msg) { resetStringInfo(msg); diff --git a/src/rustica/wamr.c b/src/rustica/wamr.c index c92f15e..68fd841 100644 --- a/src/rustica/wamr.c +++ b/src/rustica/wamr.c @@ -230,20 +230,3 @@ wasm_runtime_unregister_and_unload(wasm_module_t module) { if (module->module_type == Wasm_Module_AoT) aot_unload((AOTModule *)module); } - -void -wasm_runtime_remove_local_obj_ref(wasm_exec_env_t exec_env, - wasm_local_obj_ref_t *me) { - wasm_local_obj_ref_t *current = - wasm_runtime_get_cur_local_obj_ref(exec_env); - if (current == me) - wasm_runtime_pop_local_obj_ref(exec_env); - else { - wasm_local_obj_ref_t *next; - while (current != me) { - next = current; - current = current->prev; - } - next->prev = me->prev; - } -} diff --git a/src/rustica/wamr.h b/src/rustica/wamr.h index 55a5dba..a965cec 100644 --- a/src/rustica/wamr.h +++ b/src/rustica/wamr.h @@ -51,10 +51,6 @@ wasm_ref_type_repr(CommonHeapTypes *heap_types, wasm_ref_type_t ref_type); void wasm_runtime_unregister_and_unload(wasm_module_t module); -void -wasm_runtime_remove_local_obj_ref(wasm_exec_env_t exec_env, - wasm_local_obj_ref_t *me); - int32_t env_ereport(wasm_exec_env_t exec_env, int32_t level, wasm_obj_t ref); diff --git a/subprojects/wamr.wrap b/subprojects/wamr.wrap index 3ceccd5..d2beb74 100644 --- a/subprojects/wamr.wrap +++ b/subprojects/wamr.wrap @@ -3,5 +3,9 @@ directory = wasm-micro-runtime-WAMR-2.4.2 source_url = https://github.com/bytecodealliance/wasm-micro-runtime/archive/refs/tags/WAMR-2.4.2.tar.gz source_filename = wamr-2.4.2.tar.gz source_hash = 73380561a01f4863506e855c2c265cf03c5b6efb17bbb8c9bbafe80745fd00ef -diff_files = wamr/0001.patch, wamr/0002.patch, wamr/0004.patch method = cmake +diff_files = + wamr/0001.patch, + wamr/0002.patch, + wamr/0004.patch, + wamr/0005-Support-custom-global-resolver.patch From 613d87a18264240e86c43bceb66afef5da159edc Mon Sep 17 00:00:00 2001 From: Xenia Lu Date: Fri, 17 Oct 2025 17:07:51 +0800 Subject: [PATCH 2/5] feat: support js-string spec * Fixed static externref GC issue * Fixed NUL in WASM string issue --- meson.build | 8 +- src/rustica/adt/text.c | 214 +++++++++- src/rustica/datatypes.c | 2 +- src/rustica/main.c | 8 + ...-false-OOB-in-array.fill-for-interp.patch} | 4 +- ...block-loop-ref-params-type-checking.patch} | 4 +- ...-fix-typo-in-AOT-stack-dump-with-GC.patch} | 4 +- ...0004-Add-basic-SUPPORT_NUL_IN_STRING.patch | 388 ++++++++++++++++++ .../0005-Support-custom-global-resolver.patch | 87 ++-- .../wamr/0006-static-externref.patch | 55 +++ subprojects/wamr.wrap | 10 +- 11 files changed, 722 insertions(+), 62 deletions(-) rename subprojects/packagefiles/wamr/{0001.patch => 0001-fix-false-OOB-in-array.fill-for-interp.patch} (93%) rename subprojects/packagefiles/wamr/{0002.patch => 0002-loader-fix-block-loop-ref-params-type-checking.patch} (94%) rename subprojects/packagefiles/wamr/{0004.patch => 0003-fix-typo-in-AOT-stack-dump-with-GC.patch} (86%) create mode 100644 subprojects/packagefiles/wamr/0004-Add-basic-SUPPORT_NUL_IN_STRING.patch create mode 100644 subprojects/packagefiles/wamr/0006-static-externref.patch diff --git a/meson.build b/meson.build index acb71c1..e4db339 100644 --- a/meson.build +++ b/meson.build @@ -219,7 +219,11 @@ postgres_pure_lib = static_library('postgres_lib', cc = meson.get_compiler('c') llvm_inc_flag = '-I' + (get_option('llvm_dir') / 'include') -wamr_c_flags = ['-Wno-incompatible-pointer-types', llvm_inc_flag] +wamr_c_flags = [ + '-DWASM_SUPPORT_NUL_IN_STRING=1', + '-Wno-incompatible-pointer-types', + llvm_inc_flag, +] # https://github.com/bytecodealliance/wasm-micro-runtime/issues/4640 if cc.has_argument('-fzero-init-padding-bits=unions') wamr_c_flags += ['-fzero-init-padding-bits=unions'] @@ -304,7 +308,7 @@ executable('rustica-engine', pg_mod_args = pg.get_variable('pg_mod_args') shared_module( - 'rustica-engine', + 'rustica-engine.so', 'src/rustica/extension.c', sources: rustica_shared, include_directories: 'src', diff --git a/src/rustica/adt/text.c b/src/rustica/adt/text.c index 58fb5b1..17099a6 100644 --- a/src/rustica/adt/text.c +++ b/src/rustica/adt/text.c @@ -8,6 +8,7 @@ #include "catalog/pg_collation_d.h" #include "catalog/pg_type_d.h" #include "mb/pg_wchar.h" +#include "utils/builtins.h" #include "utils/fmgrprotos.h" #include "wasm_runtime_common.h" @@ -101,24 +102,19 @@ rst_text_substr(wasm_exec_env_t exec_env, static WASMValue * global_text_resolver(const char *utf8str, + uint32_t utf8len, WASMRefType *ref_type, uint8 val_type, uint8 is_mutable) { // Convert the WASM string from UTF-8 to server encoding - size_t llen = strlen(utf8str); + size_t llen = utf8len; if (llen > VARATT_MAX - VARHDRSZ) ereport(ERROR, errmsg("global text too long")); int len = (int)llen; - char *str = pg_any_to_server(utf8str, len, PG_UTF8); - if (str != utf8str) { - llen = strlen(str); - if (llen > VARATT_MAX - VARHDRSZ) { - pfree(str); - ereport(ERROR, - errmsg("global text too long after encoding conversion")); - } - len = (int)llen; + if (GetDatabaseEncoding() != PG_UTF8) { + ereport(ERROR, errmsg("TODO")); } + char *str = utf8str; bool is_short = len <= VARATT_SHORT_MAX - VARHDRSZ_SHORT; // Create a static object with the Datum embedded @@ -143,6 +139,189 @@ global_text_resolver(const char *utf8str, return rv; } +static int32_t +utf16_count_code_units(const char *mbstr, int size) { + int count = 0; + int mb_offset = 0; + while (mb_offset < size) { + int ch = pg_mblen(mbstr + mb_offset); + if (ch <= 0) + ch = 1; // Invalid UTF-8 sequence, treat as single byte + else if (mb_offset + ch > size) + break; // Incomplete character at end of string + if (ch < 4) + count += 1; // Normal character + else + count += 2; // Surrogate pair + mb_offset += ch; + } + return count; +} + +static int32_t +utf16_length(wasm_exec_env_t exec_env, wasm_obj_t obj) { + Datum str = wasm_externref_obj_get_datum(obj, TEXTOID); + text *t = DatumGetTextPP(str); + char *data = VARDATA_ANY(t); + int size = VARSIZE_ANY_EXHDR(t); + int count; + if (pg_database_encoding_max_length() == 1) { + count = size; + } else { + count = utf16_count_code_units(data, size); + } + RST_FREE_IF_COPY(t, str); + return count; +} + +static int32_t +utf16_char_code_at(wasm_exec_env_t exec_env, wasm_obj_t ref, int32_t index) { + Datum str = wasm_externref_obj_get_datum(ref, TEXTOID); + text *t = DatumGetTextPP(str); + char *data = VARDATA_ANY(t); + int size = VARSIZE_ANY_EXHDR(t); + pg_wchar rv[2] = { '?', 0 }; + + if (pg_database_encoding_max_length() == 1) { + if (index < 0) + index += size; + if (index < 0 || index >= size) + goto oob_error; + rv[0] = (unsigned char)data[index]; + } else { + int mb_offset = 0; + int utf16_index = 0; + int32_t utf16_size = utf16_count_code_units(data, size); + if (index < 0) + index += utf16_size; + if (index < 0 || index >= utf16_size) + goto oob_error; + // We're safe within utf16_size not to check for out-of-bounds + for (;;) { + int ch = pg_mblen(data + mb_offset); + if (ch <= 0) + ch = 1; // Invalid UTF-8 sequence, treat as single byte + if (ch < 4) + utf16_index += 1; + else + utf16_index += 2; + if (utf16_index > index) { + // The character at the given UTF-16 index starts here + pg_mb2wchar_with_len(data + mb_offset, rv, ch); + if (ch >= 4) { + if (utf16_index - 1 == index) + // We are in the second half of a surrogate pair + // Return the low surrogate code unit + rv[0] = 0xDC00 + (rv[0] & 0x3FF); + else + // We are in the first half of a surrogate pair + // Return the high surrogate code unit + rv[0] = 0xD800 + ((rv[0] - 0x10000) >> 10); + } + break; + } + mb_offset += ch; + } + } + RST_FREE_IF_COPY(t, str); + return (int32_t)rv[0]; + +oob_error: + RST_FREE_IF_COPY(t, str); + ereport(ERROR, + (errcode(ERRCODE_SUBSTRING_ERROR), errmsg("index out of range"))); +} + +static wasm_externref_obj_t +utf16_from_char_code_array(wasm_exec_env_t exec_env, wasm_obj_t obj, int32_t start, int32_t length) { + wasm_array_obj_t arr = (wasm_array_obj_t)obj; + uint32_t size = wasm_array_obj_length(arr); + StringInfoData buf; + wasm_externref_obj_t rv; + const char* err = "fromCharCodeArray failed"; + + if (length < 0) + length = size; // To the end + if (start < 0) + start += size; + if (start < 0 || start > size) + ereport(ERROR, + (errcode(ERRCODE_SUBSTRING_ERROR), errmsg("start index out of range"))); + if (start + length > size) + length = size - start; // Adjust to fit + if (length == 0) + return rst_externref_of_owned_datum(exec_env, CStringGetTextDatum(""), TEXTOID); + initStringInfo(&buf); + for (uint32_t i = start; i < size; i++) { + wasm_value_t value; + int32_t cu; + pg_wchar ch; + unsigned char utf8[4]; + + wasm_array_obj_get_elem(arr, i, false, &value); + cu = value.i32; + if (cu < 0 || cu > 0xFFFF) { + err = "Invalid UTF-16 code unit"; + goto error; + } + if (is_utf16_surrogate_first(cu)) { + // High surrogate, must be followed by a low surrogate + if (i + 1 < size) { + wasm_array_obj_get_elem(arr, i + 1, false, &value); + if (is_utf16_surrogate_second(value.i32)) { + ch = surrogate_pair_to_codepoint(cu, value.i32); + i++; // Consumed the low surrogate + } else { + err = "Invalid surrogate pair"; + goto error; + } + } else { + err = "Incomplete surrogate pair"; + goto error; + } + } else if (is_utf16_surrogate_second(cu)) { + err = "Unmatched low surrogate"; + goto error; + } else { + ch = cu; // Normal code unit + } + unicode_to_utf8(ch, utf8); + appendBinaryStringInfo(&buf, (char *)utf8, unicode_utf8len(ch)); + } + rv = cstring_into_varatt_obj(exec_env, buf.data, buf.len, TEXTOID); + pfree(buf.data); + return rv; + +error: + pfree(buf.data); + wasm_runtime_set_exception(wasm_runtime_get_module_inst(exec_env), err); + return NULL; +} + +static wasm_externref_obj_t +utf16_from_code_point(wasm_exec_env_t exec_env, int32_t code_point) { + unsigned char buf[5]; + if (code_point == 0) { + obj_t obj = rst_obj_new(exec_env, OBJ_DATUM, NULL, VARHDRSZ_SHORT + 1); + obj->oid = TEXTOID; + SET_VARSIZE_1B(obj->body.ptr, VARHDRSZ_SHORT + 1); + VARDATA_ANY(obj->body.ptr)[0] = '\0'; + return rst_externref_of_obj(exec_env, obj); + } + pg_unicode_to_server(code_point, buf); + return cstring_into_varatt_obj(exec_env, buf, strlen(buf), TEXTOID); +} + +static void +console_log(wasm_exec_env_t exec_env, wasm_obj_t obj) { + Datum str = wasm_externref_obj_get_datum(obj, TEXTOID); + text *t = DatumGetTextPP(str); + char *cstr = text_to_cstring(t); + RST_FREE_IF_COPY(t, str); + printf("%s\n", cstr); + pfree(cstr); +} + static NativeSymbol text_natives[] = { { "textlen", rst_textlen, "(r)i" }, { "textget", rst_textget, "(ri)i" }, @@ -151,9 +330,24 @@ static NativeSymbol text_natives[] = { { "text_substr", rst_text_substr, "(rii)r" }, }; +static NativeSymbol wasm_js_string_natives[] = { + { "length", utf16_length, "(r)i" }, + { "charCodeAt", utf16_char_code_at, "(ri)i" }, + { "fromCharCodeArray", utf16_from_char_code_array, "(rii)r" }, + { "equals", rst_texteq, "(rr)i" }, + { "concat", rst_textcat, "(rr)r" }, + { "fromCodePoint", utf16_from_code_point, "(i)r" }, +}; + +static NativeSymbol console_natives[] = { + { "log", console_log, "(r)" }, +}; + void rst_register_natives_text() { REGISTER_WASM_NATIVES("env", text_natives); + REGISTER_WASM_NATIVES("wasm:js-string", wasm_js_string_natives); + REGISTER_WASM_NATIVES("console", console_natives); if (!wasm_register_global_resolver("env:text", global_text_resolver)) ereport(ERROR, errmsg("cannot register global resolver for texts")); } diff --git a/src/rustica/datatypes.c b/src/rustica/datatypes.c index 53ddbd0..7a70256 100644 --- a/src/rustica/datatypes.c +++ b/src/rustica/datatypes.c @@ -131,7 +131,7 @@ rst_obj_new_static(ObjType type, obj_t *obj_out, size_t embed_size) { anyref->header = WASM_OBJ_ANYREF_OBJ_FLAG; anyref->host_obj = obj; - externref->header = WASM_OBJ_EXTERNREF_OBJ_FLAG; + externref->header = WASM_OBJ_EXTERNREF_OBJ_FLAG | WASM_OBJ_STATIC_OBJ_FLAG; externref->internal_obj = (WASMObjectRef)anyref; rv->gc_obj = (wasm_obj_t)externref; return rv; diff --git a/src/rustica/main.c b/src/rustica/main.c index e7f801c..497dadd 100644 --- a/src/rustica/main.c +++ b/src/rustica/main.c @@ -3,6 +3,8 @@ #include "postgres.h" #include "getopt_long.h" +#include "catalog/pg_collation_d.h" +#include "mb/pg_wchar.h" #include "utils/memutils.h" #include "utils/pg_locale.h" @@ -140,6 +142,12 @@ main(int argc, char *argv[]) { init_locale("LC_TIME", LC_TIME, "C"); unsetenv("LC_ALL"); + SetDatabaseEncoding(PG_UTF8); + + // Initialize default_locale with ICU collator + default_locale.provider = COLLPROVIDER_ICU; + default_locale.deterministic = true; + make_icu_collator("", NULL, &default_locale); PG_TRY(); { diff --git a/subprojects/packagefiles/wamr/0001.patch b/subprojects/packagefiles/wamr/0001-fix-false-OOB-in-array.fill-for-interp.patch similarity index 93% rename from subprojects/packagefiles/wamr/0001.patch rename to subprojects/packagefiles/wamr/0001-fix-false-OOB-in-array.fill-for-interp.patch index 94389f3..23dc16e 100644 --- a/subprojects/packagefiles/wamr/0001.patch +++ b/subprojects/packagefiles/wamr/0001-fix-false-OOB-in-array.fill-for-interp.patch @@ -1,7 +1,7 @@ -From af896e1c1de2bfd8add4b841fb820f13214761e7 Mon Sep 17 00:00:00 2001 +From 4d4ca727bef43db619dc3416ace14e9fb9e197a6 Mon Sep 17 00:00:00 2001 From: Xenia Lu Date: Fri, 3 Oct 2025 11:18:17 +0800 -Subject: [PATCH 1/4] fix: false OOB in array.fill for interp +Subject: [PATCH 1/5] fix: false OOB in array.fill for interp --- core/iwasm/interpreter/wasm_interp_classic.c | 2 +- diff --git a/subprojects/packagefiles/wamr/0002.patch b/subprojects/packagefiles/wamr/0002-loader-fix-block-loop-ref-params-type-checking.patch similarity index 94% rename from subprojects/packagefiles/wamr/0002.patch rename to subprojects/packagefiles/wamr/0002-loader-fix-block-loop-ref-params-type-checking.patch index fb63c86..a1ca14f 100644 --- a/subprojects/packagefiles/wamr/0002.patch +++ b/subprojects/packagefiles/wamr/0002-loader-fix-block-loop-ref-params-type-checking.patch @@ -1,7 +1,7 @@ -From a0999fbf49f7d89f789c809f3561817e98704728 Mon Sep 17 00:00:00 2001 +From b850e17394190ae032040d3ed55acf97c4ac6ee7 Mon Sep 17 00:00:00 2001 From: Xenia Lu Date: Wed, 24 Sep 2025 09:14:14 +0800 -Subject: [PATCH 2/4] loader: fix block/loop ref params type checking +Subject: [PATCH 2/5] loader: fix block/loop ref params type checking --- core/iwasm/interpreter/wasm_loader.c | 29 ++++++++++++++++++++++++++++ diff --git a/subprojects/packagefiles/wamr/0004.patch b/subprojects/packagefiles/wamr/0003-fix-typo-in-AOT-stack-dump-with-GC.patch similarity index 86% rename from subprojects/packagefiles/wamr/0004.patch rename to subprojects/packagefiles/wamr/0003-fix-typo-in-AOT-stack-dump-with-GC.patch index aa4ab47..0d4e1d0 100644 --- a/subprojects/packagefiles/wamr/0004.patch +++ b/subprojects/packagefiles/wamr/0003-fix-typo-in-AOT-stack-dump-with-GC.patch @@ -1,7 +1,7 @@ -From 0df937642752c70d4d67136554c17c4f1430282f Mon Sep 17 00:00:00 2001 +From 6e9f7f280c00d4dbb85b79ba92a678e2907c6e1d Mon Sep 17 00:00:00 2001 From: Xenia Lu Date: Sun, 5 Oct 2025 09:36:44 +0800 -Subject: [PATCH 4/4] fix: typo in AOT stack dump with GC +Subject: [PATCH 3/5] fix: typo in AOT stack dump with GC --- core/iwasm/aot/aot_runtime.c | 2 +- diff --git a/subprojects/packagefiles/wamr/0004-Add-basic-SUPPORT_NUL_IN_STRING.patch b/subprojects/packagefiles/wamr/0004-Add-basic-SUPPORT_NUL_IN_STRING.patch new file mode 100644 index 0000000..a5dea2f --- /dev/null +++ b/subprojects/packagefiles/wamr/0004-Add-basic-SUPPORT_NUL_IN_STRING.patch @@ -0,0 +1,388 @@ +From 7f1a30fa74f8a06fd28953f8a35a8e9cfaf47d79 Mon Sep 17 00:00:00 2001 +From: Xenia Lu +Date: Thu, 16 Oct 2025 10:56:58 +0800 +Subject: [PATCH 4/5] Add basic SUPPORT_NUL_IN_STRING + +--- + core/config.h | 4 ++ + core/iwasm/aot/aot_loader.c | 53 ++++++++++++++++------ + core/iwasm/aot/aot_runtime.c | 9 +++- + core/iwasm/compilation/aot.c | 3 ++ + core/iwasm/compilation/aot.h | 3 ++ + core/iwasm/compilation/aot_emit_aot_file.c | 13 ++++++ + core/iwasm/interpreter/wasm.h | 6 +++ + core/iwasm/interpreter/wasm_loader.c | 18 ++++++-- + core/iwasm/interpreter/wasm_runtime.c | 30 ++++++++++-- + 9 files changed, 118 insertions(+), 21 deletions(-) + +diff --git a/core/config.h b/core/config.h +index 3ee0b2cd..54150477 100644 +--- a/core/config.h ++++ b/core/config.h +@@ -724,4 +724,8 @@ unless used elsewhere */ + #define WASM_ENABLE_EXTENDED_CONST_EXPR 0 + #endif + ++#ifndef WASM_SUPPORT_NUL_IN_STRING ++#define WASM_SUPPORT_NUL_IN_STRING 1 ++#endif ++ + #endif /* end of _CONFIG_H_ */ +diff --git a/core/iwasm/aot/aot_loader.c b/core/iwasm/aot/aot_loader.c +index 771ef87f..95984387 100644 +--- a/core/iwasm/aot/aot_loader.c ++++ b/core/iwasm/aot/aot_loader.c +@@ -194,12 +194,20 @@ GET_U16_FROM_ADDR(const uint8 *p) + p += len; \ + } while (0) + +-#define read_string(p, p_end, str) \ +- do { \ +- if (!(str = load_string((uint8 **)&p, p_end, module, \ +- is_load_from_file_buf, true, error_buf, \ +- error_buf_size))) \ +- goto fail; \ ++#define read_string(p, p_end, str) \ ++ do { \ ++ if (!(str = load_string((uint8 **)&p, p_end, module, \ ++ is_load_from_file_buf, true, NULL, error_buf, \ ++ error_buf_size))) \ ++ goto fail; \ ++ } while (0) ++ ++#define read_string_len(p, p_end, str, len) \ ++ do { \ ++ if (!(str = load_string((uint8 **)&p, p_end, module, \ ++ is_load_from_file_buf, true, len, error_buf, \ ++ error_buf_size))) \ ++ goto fail; \ + } while (0) + + #else /* else of (WASM_ENABLE_WORD_ALIGN_READ != 0) */ +@@ -230,12 +238,20 @@ GET_U16_FROM_ADDR(const uint8 *p) + p += len; \ + } while (0) + +-#define read_string(p, p_end, str) \ +- do { \ +- if (!(str = load_string((uint8 **)&p, p_end, module, \ +- is_load_from_file_buf, error_buf, \ +- error_buf_size))) \ +- goto fail; \ ++#define read_string(p, p_end, str) \ ++ do { \ ++ if (!(str = load_string((uint8 **)&p, p_end, module, \ ++ is_load_from_file_buf, NULL, error_buf, \ ++ error_buf_size))) \ ++ goto fail; \ ++ } while (0) ++ ++#define read_string_len(p, p_end, str, len) \ ++ do { \ ++ if (!(str = load_string((uint8 **)&p, p_end, module, \ ++ is_load_from_file_buf, len, error_buf, \ ++ error_buf_size))) \ ++ goto fail; \ + } while (0) + + #endif /* end of (WASM_ENABLE_WORD_ALIGN_READ != 0) */ +@@ -343,7 +359,7 @@ load_string(uint8 **p_buf, const uint8 *buf_end, AOTModule *module, + #if (WASM_ENABLE_WORD_ALIGN_READ != 0) + bool is_vram_word_align, + #endif +- char *error_buf, uint32 error_buf_size) ++ uint32 *length, char *error_buf, uint32 error_buf_size) + { + uint8 *p = *p_buf; + const uint8 *p_end = buf_end; +@@ -366,11 +382,13 @@ load_string(uint8 **p_buf, const uint8 *buf_end, AOTModule *module, + } + #endif + else if (is_load_from_file_buf) { ++#if WASM_SUPPORT_NUL_IN_STRING == 0 + /* The string is always terminated with '\0', use it directly. + * In this case, the file buffer can be referred to after loading. + */ + if (p[str_len - 1] != '\0') + goto fail; ++#endif + + str = (char *)p; + } +@@ -378,8 +396,10 @@ load_string(uint8 **p_buf, const uint8 *buf_end, AOTModule *module, + /* Load from sections, the file buffer cannot be referred to + after loading, we must create another string and insert it + into const string set */ ++#if WASM_SUPPORT_NUL_IN_STRING == 0 + if (p[str_len - 1] != '\0') + goto fail; ++#endif + + if (!(str = aot_const_str_set_insert((uint8 *)p, str_len, module, + #if (WASM_ENABLE_WORD_ALIGN_READ != 0) +@@ -392,6 +412,8 @@ load_string(uint8 **p_buf, const uint8 *buf_end, AOTModule *module, + p += str_len; + + *p_buf = p; ++ if (length) ++ *length = str_len; + return str; + fail: + return NULL; +@@ -2237,7 +2259,12 @@ load_import_globals(const uint8 **p_buf, const uint8 *buf_end, + read_uint8(buf, buf_end, import_globals[i].type.val_type); + read_uint8(buf, buf_end, import_globals[i].type.is_mutable); + read_string(buf, buf_end, import_globals[i].module_name); ++#if WASM_SUPPORT_NUL_IN_STRING == 0 + read_string(buf, buf_end, import_globals[i].global_name); ++#else ++ read_string_len(buf, buf_end, import_globals[i].global_name, ++ &import_globals[i].global_name_len); ++#endif + + if (!is_valid_value_type(import_globals[i].type.val_type)) { + return false; +diff --git a/core/iwasm/aot/aot_runtime.c b/core/iwasm/aot/aot_runtime.c +index 85d7a3ca..f6c205f8 100644 +--- a/core/iwasm/aot/aot_runtime.c ++++ b/core/iwasm/aot/aot_runtime.c +@@ -1868,8 +1868,15 @@ check_linked_symbol(AOTModule *module, char *error_buf, uint32 error_buf_size) + AOTImportGlobal *global = module->import_globals + i; + if (!global->is_linked) { + set_error_buf_v(error_buf, error_buf_size, ++#if WASM_ENABLE_MULTI_MODULE == 0 + "failed to link import global (%s, %s)", +- global->module_name, global->global_name); ++ global->module_name, global->global_name ++#else ++ "failed to link import global (%s, %.*s)", ++ global->module_name, global->global_name_len, ++ global->global_name ++#endif ++ ); + return false; + } + } +diff --git a/core/iwasm/compilation/aot.c b/core/iwasm/compilation/aot.c +index 5e1e554a..ec93ec51 100644 +--- a/core/iwasm/compilation/aot.c ++++ b/core/iwasm/compilation/aot.c +@@ -236,6 +236,9 @@ aot_create_import_globals(const WASMModule *module, bool gc_enabled, + WASMGlobalImport *import_global = &module->import_globals[i].u.global; + import_globals[i].module_name = import_global->module_name; + import_globals[i].global_name = import_global->field_name; ++#if WASM_SUPPORT_NUL_IN_STRING != 0 ++ import_globals[i].global_name_len = import_global->field_name_len; ++#endif + import_globals[i].type.val_type = import_global->type.val_type; + import_globals[i].type.is_mutable = import_global->type.is_mutable; + import_globals[i].global_data_linked = +diff --git a/core/iwasm/compilation/aot.h b/core/iwasm/compilation/aot.h +index 973d198c..cb953152 100644 +--- a/core/iwasm/compilation/aot.h ++++ b/core/iwasm/compilation/aot.h +@@ -142,6 +142,9 @@ typedef struct AOTTableInitData { + typedef struct AOTImportGlobal { + char *module_name; + char *global_name; ++#if WASM_SUPPORT_NUL_IN_STRING != 0 ++ uint32 global_name_len; ++#endif + WASMGlobalType type; + uint32 size; + /* The data offset of current global in global data */ +diff --git a/core/iwasm/compilation/aot_emit_aot_file.c b/core/iwasm/compilation/aot_emit_aot_file.c +index 12749305..ad1e20fe 100644 +--- a/core/iwasm/compilation/aot_emit_aot_file.c ++++ b/core/iwasm/compilation/aot_emit_aot_file.c +@@ -653,7 +653,11 @@ get_import_global_size(AOTCompContext *comp_ctx, AOTImportGlobal *import_global) + uint32 size = (uint32)sizeof(uint8) * 2 + + get_string_size(comp_ctx, import_global->module_name); + size = align_uint(size, 2); ++#if WASM_SUPPORT_NUL_IN_STRING == 0 + size += get_string_size(comp_ctx, import_global->global_name); ++#else ++ size += (uint32)sizeof(uint16) + import_global->global_name_len; ++#endif + return size; + } + +@@ -2282,7 +2286,16 @@ aot_emit_import_global_info(uint8 *buf, uint8 *buf_end, uint32 *p_offset, + EMIT_U8(import_global->type.is_mutable); + EMIT_STR(import_global->module_name); + offset = align_uint(offset, 2); ++#if WASM_SUPPORT_NUL_IN_STRING == 0 + EMIT_STR(import_global->global_name); ++#else ++ if (import_global->global_name_len > UINT16_MAX) { ++ aot_set_last_error("import global name is too long"); ++ return false; ++ } ++ EMIT_U16((uint16)import_global->global_name_len); ++ EMIT_BUF(import_global->global_name, import_global->global_name_len); ++#endif + } + + if (offset - *p_offset +diff --git a/core/iwasm/interpreter/wasm.h b/core/iwasm/interpreter/wasm.h +index 0dd73958..9ffb9cd6 100644 +--- a/core/iwasm/interpreter/wasm.h ++++ b/core/iwasm/interpreter/wasm.h +@@ -645,6 +645,9 @@ typedef struct WASMGlobalType { + typedef struct WASMGlobalImport { + char *module_name; + char *field_name; ++#if WASM_SUPPORT_NUL_IN_STRING != 0 ++ uint32 field_name_len; ++#endif + WASMGlobalType type; + bool is_linked; + /* global data after linked */ +@@ -833,6 +836,9 @@ typedef struct WASIArguments { + typedef struct StringNode { + struct StringNode *next; + char *str; ++#if WASM_SUPPORT_NUL_IN_STRING != 0 ++ uint32 str_len; ++#endif + } StringNode, *StringList; + + typedef struct BrTableCache { +diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c +index b86c32d9..4b8ad2d9 100644 +--- a/core/iwasm/interpreter/wasm_loader.c ++++ b/core/iwasm/interpreter/wasm_loader.c +@@ -3237,7 +3237,11 @@ fail: + static bool + load_global_import(const uint8 **p_buf, const uint8 *buf_end, + WASMModule *parent_module, char *sub_module_name, +- char *global_name, WASMGlobalImport *global, char *error_buf, ++ char *global_name, ++#if WASM_SUPPORT_NUL_IN_STRING != 0 ++ uint32 global_name_len, ++#endif ++ WASMGlobalImport *global, char *error_buf, + uint32 error_buf_size) + { + const uint8 *p = *p_buf, *p_end = buf_end; +@@ -3326,6 +3330,9 @@ load_global_import(const uint8 **p_buf, const uint8 *buf_end, + + global->module_name = sub_module_name; + global->field_name = global_name; ++#if WASM_SUPPORT_NUL_IN_STRING != 0 ++ global->field_name_len = global_name_len; ++#endif + global->type.val_type = declare_type; + global->type.is_mutable = (declare_mutable == 1); + +@@ -3737,9 +3744,12 @@ load_import_section(const uint8 *buf, const uint8 *buf_end, WASMModule *module, + case IMPORT_KIND_GLOBAL: /* import global */ + bh_assert(import_globals); + import = import_globals++; +- if (!load_global_import(&p, p_end, module, sub_module_name, +- field_name, &import->u.global, +- error_buf, error_buf_size)) { ++ if (!load_global_import( ++ &p, p_end, module, sub_module_name, field_name, ++#if WASM_SUPPORT_NUL_IN_STRING != 0 ++ name_len, ++#endif ++ &import->u.global, error_buf, error_buf_size)) { + return false; + } + break; +diff --git a/core/iwasm/interpreter/wasm_runtime.c b/core/iwasm/interpreter/wasm_runtime.c +index b4aa483d..67e19af7 100644 +--- a/core/iwasm/interpreter/wasm_runtime.c ++++ b/core/iwasm/interpreter/wasm_runtime.c +@@ -1991,8 +1991,15 @@ check_linked_symbol(WASMModuleInstance *module_inst, char *error_buf, + return false; + #else + set_error_buf_v(error_buf, error_buf_size, ++#if WASM_SUPPORT_NUL_IN_STRING == 0 + "failed to link import global (%s, %s)", +- global->module_name, global->field_name); ++ global->module_name, global->field_name ++#else ++ "failed to link import global (%s, %.*s)", ++ global->module_name, global->field_name_len, ++ global->field_name ++#endif ++ ); + return false; + #endif /* WASM_ENABLE_SPEC_TEST != 0 */ + } +@@ -4187,8 +4194,12 @@ wasm_get_module_mem_consumption(const WASMModule *module, + StringNode *node = module->const_str_list, *node_next; + while (node) { + node_next = node->next; ++#if WASM_SUPPORT_NUL_IN_STRING == 0 + mem_conspn->const_strs_size += + sizeof(StringNode) + strlen(node->str) + 1; ++#else ++ mem_conspn->const_strs_size += sizeof(StringNode) + node->str_len; ++#endif + node = node_next; + } + } +@@ -5004,12 +5015,14 @@ wasm_check_utf8_str(const uint8 *str, uint32 len) + while (p < p_end) { + chr = *p; + ++#if WASM_SUPPORT_NUL_IN_STRING == 0 + if (chr == 0) { + LOG_WARNING( + "LIMITATION: a string which contains '\\00' is unsupported"); + return false; + } +- else if (chr < 0x80) { ++#endif ++ if (chr < 0x80) { + p++; + } + else if (chr >= 0xC2 && chr <= 0xDF && p + 1 < p_end) { +@@ -5080,6 +5093,7 @@ wasm_const_str_list_insert(const uint8 *str, uint32 len, WASMModule *module, + return ""; + } + else if (is_load_from_file_buf) { ++ // TODO: WASM_SUPPORT_NUL_IN_STRING: drop + /* As the file buffer can be referred to after loading, we use + the previous byte of leb encoded size to adjust the string: + move string 1 byte backward and then append '\0' */ +@@ -5092,8 +5106,14 @@ wasm_const_str_list_insert(const uint8 *str, uint32 len, WASMModule *module, + /* Search const str list */ + node = module->const_str_list; + while (node) { ++ uint32 str_len; + node_next = node->next; +- if (strlen(node->str) == len && !memcmp(node->str, str, len)) ++#if WASM_SUPPORT_NUL_IN_STRING == 0 ++ str_len = strlen(node->str); ++#else ++ str_len = node->str_len; ++#endif ++ if (str_len == len && !memcmp(node->str, str, len)) + break; + node = node_next; + } +@@ -5102,6 +5122,7 @@ wasm_const_str_list_insert(const uint8 *str, uint32 len, WASMModule *module, + return node->str; + } + ++ // TODO: WASM_SUPPORT_NUL_IN_STRING: drop len + 1 + if (!(node = runtime_malloc(sizeof(StringNode) + len + 1, error_buf, + error_buf_size))) { + return NULL; +@@ -5110,6 +5131,9 @@ wasm_const_str_list_insert(const uint8 *str, uint32 len, WASMModule *module, + node->str = ((char *)node) + sizeof(StringNode); + bh_memcpy_s(node->str, len + 1, str, len); + node->str[len] = '\0'; ++#if WASM_SUPPORT_NUL_IN_STRING != 0 ++ node->str_len = len; ++#endif + + if (!module->const_str_list) { + /* set as head */ +-- +2.51.0 + diff --git a/subprojects/packagefiles/wamr/0005-Support-custom-global-resolver.patch b/subprojects/packagefiles/wamr/0005-Support-custom-global-resolver.patch index 2f19021..edeb269 100644 --- a/subprojects/packagefiles/wamr/0005-Support-custom-global-resolver.patch +++ b/subprojects/packagefiles/wamr/0005-Support-custom-global-resolver.patch @@ -1,29 +1,31 @@ -From e0813f59bed707e30efd0f3f49954d6ec0f5b31d Mon Sep 17 00:00:00 2001 +From 54cb509104b1add72ee02eac534569c6d64e1d25 Mon Sep 17 00:00:00 2001 From: Xenia Lu Date: Wed, 22 Jan 2025 18:46:35 +0800 Subject: [PATCH 5/5] Support custom global resolver --- - core/iwasm/aot/aot_loader.c | 16 +++++++ - core/iwasm/common/wasm_native.c | 70 ++++++++++++++++++++++++++++ - core/iwasm/common/wasm_native.h | 21 +++++++++ - core/iwasm/interpreter/wasm.h | 7 +++ - core/iwasm/interpreter/wasm_loader.c | 20 +++++++- - 5 files changed, 133 insertions(+), 1 deletion(-) + core/iwasm/aot/aot_loader.c | 18 +++++++ + core/iwasm/common/wasm_native.c | 73 ++++++++++++++++++++++++++++ + core/iwasm/common/wasm_native.h | 24 +++++++++ + core/iwasm/interpreter/wasm.h | 9 ++++ + core/iwasm/interpreter/wasm_loader.c | 23 ++++++++- + 5 files changed, 146 insertions(+), 1 deletion(-) diff --git a/core/iwasm/aot/aot_loader.c b/core/iwasm/aot/aot_loader.c -index 0949d2b8..6e7b6a0e 100644 +index 95984387..a8689580 100644 --- a/core/iwasm/aot/aot_loader.c +++ b/core/iwasm/aot/aot_loader.c -@@ -2161,6 +2161,22 @@ load_import_globals(const uint8 **p_buf, const uint8 *buf_end, +@@ -2289,6 +2289,24 @@ load_import_globals(const uint8 **p_buf, const uint8 *buf_end, import_globals[i].is_linked = false; #endif + /* Call custom globals resolver */ + if (!import_globals[i].is_linked) { + WASMValue *linked_data = wasm_resolve_global( -+ import_globals[i].module_name, -+ import_globals[i].global_name, ++ import_globals[i].module_name, import_globals[i].global_name, ++#if WASM_SUPPORT_NUL_IN_STRING != 0 ++ import_globals[i].global_name_len, ++#endif +#if WASM_ENABLE_GC != 0 + NULL, +#endif @@ -39,7 +41,7 @@ index 0949d2b8..6e7b6a0e 100644 wasm_value_type_size(import_globals[i].type.val_type); import_globals[i].data_offset = data_offset; diff --git a/core/iwasm/common/wasm_native.c b/core/iwasm/common/wasm_native.c -index 9e8764a2..e96c26ca 100644 +index 060bb2c3..d374c977 100644 --- a/core/iwasm/common/wasm_native.c +++ b/core/iwasm/common/wasm_native.c @@ -20,6 +20,7 @@ @@ -50,12 +52,15 @@ index 9e8764a2..e96c26ca 100644 #if WASM_ENABLE_LIBC_WASI != 0 static void *g_wasi_context_key; -@@ -319,6 +320,74 @@ wasm_native_unregister_natives(const char *module_name, +@@ -324,6 +325,78 @@ wasm_native_unregister_natives(const char *module_name, return false; } +WASMValue * +wasm_resolve_global(const char *module_name, const char *global_name, ++#if WASM_SUPPORT_NUL_IN_STRING != 0 ++ uint32 global_name_len, ++#endif +#if WASM_ENABLE_GC != 0 + WASMRefType *ref_type, +#endif @@ -68,13 +73,14 @@ index 9e8764a2..e96c26ca 100644 + while (node) { + node_next = node->next; + if (!strcmp(node->module_name, module_name)) { -+ if ((rv = node->resolver( -+ global_name, ++ if ((rv = node->resolver(global_name, ++#if WASM_SUPPORT_NUL_IN_STRING != 0 ++ global_name_len, ++#endif +#if WASM_ENABLE_GC != 0 -+ ref_type, ++ ref_type, +#endif -+ val_type, -+ is_mutable))) ++ val_type, is_mutable))) + break; + } + node = node_next; @@ -125,13 +131,8 @@ index 9e8764a2..e96c26ca 100644 #if WASM_ENABLE_MODULE_INST_CONTEXT != 0 static uint32 context_key_to_idx(void *key) -@@ -1517,3 +1586,4 @@ wasm_native_lookup_quick_aot_entry(const WASMFuncType *func_type) - return NULL; - } - #endif /* end of WASM_ENABLE_QUICK_AOT_ENTRY != 0 */ -+ diff --git a/core/iwasm/common/wasm_native.h b/core/iwasm/common/wasm_native.h -index 5cb78bf9..0c40d01c 100644 +index 9a6afee1..3af48b50 100644 --- a/core/iwasm/common/wasm_native.h +++ b/core/iwasm/common/wasm_native.h @@ -22,6 +22,12 @@ typedef struct NativeSymbolsNode { @@ -147,12 +148,15 @@ index 5cb78bf9..0c40d01c 100644 /** * Lookup global variable of a given import global * from libc builtin globals -@@ -69,6 +75,21 @@ bool +@@ -69,6 +75,24 @@ bool wasm_native_unregister_natives(const char *module_name, NativeSymbol *native_symbols); +WASMValue * +wasm_resolve_global(const char *module_name, const char *global_name, ++#if WASM_SUPPORT_NUL_IN_STRING != 0 ++ uint32 global_name_len, ++#endif +#if WASM_ENABLE_GC != 0 + WASMRefType *ref_type, +#endif @@ -170,41 +174,45 @@ index 5cb78bf9..0c40d01c 100644 struct WASMModuleInstanceCommon; diff --git a/core/iwasm/interpreter/wasm.h b/core/iwasm/interpreter/wasm.h -index 03519c18..20578207 100644 +index 9ffb9cd6..ef88e00b 100644 --- a/core/iwasm/interpreter/wasm.h +++ b/core/iwasm/interpreter/wasm.h -@@ -1114,6 +1114,13 @@ typedef struct WASMBranchBlock { +@@ -1172,6 +1172,15 @@ typedef struct WASMBranchBlock { #endif } WASMBranchBlock; +typedef WASMValue *(*wasm_global_resolver_t)(const char *name, ++#if WASM_SUPPORT_NUL_IN_STRING != 0 ++ uint32 name_len, ++#endif +#if WASM_ENABLE_GC != 0 + WASMRefType *ref_type, +#endif -+ uint8 val_type, -+ uint8 is_mutable); ++ uint8 val_type, uint8 is_mutable); + /** * Align an unsigned value on a alignment boundary. * diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c -index f62103a4..94504d6b 100644 +index 4b8ad2d9..cbb7310a 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c -@@ -3019,6 +3019,24 @@ load_global_import(const uint8 **p_buf, const uint8 *buf_end, +@@ -3308,6 +3308,26 @@ load_global_import(const uint8 **p_buf, const uint8 *buf_end, global->is_linked = true; } #endif + + /* Call custom globals resolver */ + if (!global->is_linked) { -+ WASMValue *linked_data = wasm_resolve_global(sub_module_name, -+ global_name, ++ WASMValue *linked_data = ++ wasm_resolve_global(sub_module_name, global_name, ++#if WASM_SUPPORT_NUL_IN_STRING != 0 ++ global_name_len, ++#endif +#if WASM_ENABLE_GC != 0 -+ &ref_type, ++ &ref_type, +#endif -+ declare_type, -+ declare_mutable); ++ declare_type, declare_mutable); + if (linked_data) { + global->type.val_type = declare_type; + global->type.is_mutable = declare_mutable; @@ -216,15 +224,16 @@ index f62103a4..94504d6b 100644 #if WASM_ENABLE_MULTI_MODULE != 0 if (!global->is_linked && !wasm_runtime_is_built_in_module(sub_module_name)) { -@@ -4037,7 +4055,7 @@ load_global_section(const uint8 *buf, const uint8 *buf_end, WASMModule *module, +@@ -4361,7 +4381,8 @@ load_global_section(const uint8 *buf, const uint8 *buf_end, WASMModule *module, ->globals[global_idx - module->import_global_count] .ref_type; } - if (!wasm_reftype_is_subtype_of( -+ if (global->ref_type && !wasm_reftype_is_subtype_of( ++ if (global->ref_type ++ && !wasm_reftype_is_subtype_of( global_type, global_ref_type, global->type.val_type, global->ref_type, module->types, module->type_count)) { set_error_buf(error_buf, error_buf_size, "type mismatch"); -- -2.47.1 +2.51.0 diff --git a/subprojects/packagefiles/wamr/0006-static-externref.patch b/subprojects/packagefiles/wamr/0006-static-externref.patch new file mode 100644 index 0000000..1a93533 --- /dev/null +++ b/subprojects/packagefiles/wamr/0006-static-externref.patch @@ -0,0 +1,55 @@ +diff --git a/core/iwasm/common/gc/gc_object.h b/core/iwasm/common/gc/gc_object.h +index 75fdbef5..5622bfd8 100644 +--- a/core/iwasm/common/gc/gc_object.h ++++ b/core/iwasm/common/gc/gc_object.h +@@ -33,6 +33,8 @@ typedef uintptr_t WASMObjectHeader; + + #define WASM_OBJ_EXTRA_INFO_FLAG (((uintptr_t)1) << 2) + ++#define WASM_OBJ_STATIC_OBJ_FLAG (((uintptr_t)1) << 3) ++ + /* Representation of WASM objects */ + typedef struct WASMObject { + WASMObjectHeader header; +@@ -315,6 +317,10 @@ wasm_obj_is_created_from_heap(WASMObjectRef obj) + if (obj == NULL || (((uintptr_t)obj) & 1)) + /* null object or i31 object */ + return false; ++ if ((obj->header & (WASM_OBJ_EXTERNREF_OBJ_FLAG | WASM_OBJ_STATIC_OBJ_FLAG)) ++ == (WASM_OBJ_EXTERNREF_OBJ_FLAG | WASM_OBJ_STATIC_OBJ_FLAG)) ++ /* static externref object */ ++ return false; + return true; + } + +diff --git a/core/shared/mem-alloc/ems/ems_gc.c b/core/shared/mem-alloc/ems/ems_gc.c +index 26e83a97..07befa8c 100644 +--- a/core/shared/mem-alloc/ems/ems_gc.c ++++ b/core/shared/mem-alloc/ems/ems_gc.c +@@ -6,6 +6,8 @@ + #include "ems_gc.h" + #include "ems_gc_internal.h" + ++#include ++ + #define GB (1 << 30UL) + + #define MARK_NODE_OBJ_CNT 256 +@@ -379,7 +381,7 @@ reclaim_instance_heap(gc_heap_t *heap) + offset = ref_start_offset + j * sizeof(void *); + bh_assert(offset + sizeof(void *) < size); + ref = *(gc_object_t *)(((gc_uint8 *)obj) + offset); +- if (ref == NULL_REF || ((uintptr_t)ref & 1)) ++ if (!wasm_obj_is_created_from_heap(ref)) + continue; /* null object or i31 object */ + if (add_wo_to_expand(heap, ref) == GC_ERROR) { + LOG_ERROR("add_wo_to_expand failed"); +@@ -395,7 +397,7 @@ reclaim_instance_heap(gc_heap_t *heap) + bh_assert(offset + sizeof(void *) < size); + + ref = *(gc_object_t *)(((gc_uint8 *)obj) + offset); +- if (ref == NULL_REF || ((uintptr_t)ref & 1)) ++ if (!wasm_obj_is_created_from_heap(ref)) + continue; /* null object or i31 object */ + if (add_wo_to_expand(heap, ref) == GC_ERROR) { + LOG_ERROR("mark process failed"); diff --git a/subprojects/wamr.wrap b/subprojects/wamr.wrap index d2beb74..82718d2 100644 --- a/subprojects/wamr.wrap +++ b/subprojects/wamr.wrap @@ -5,7 +5,9 @@ source_filename = wamr-2.4.2.tar.gz source_hash = 73380561a01f4863506e855c2c265cf03c5b6efb17bbb8c9bbafe80745fd00ef method = cmake diff_files = - wamr/0001.patch, - wamr/0002.patch, - wamr/0004.patch, - wamr/0005-Support-custom-global-resolver.patch + wamr/0001-fix-false-OOB-in-array.fill-for-interp.patch, + wamr/0002-loader-fix-block-loop-ref-params-type-checking.patch, + wamr/0003-fix-typo-in-AOT-stack-dump-with-GC.patch, + wamr/0004-Add-basic-SUPPORT_NUL_IN_STRING.patch, + wamr/0005-Support-custom-global-resolver.patch, + wamr/0006-static-externref.patch From 63f775b9ec8b9caae25f9794dc72a44751c6231b Mon Sep 17 00:00:00 2001 From: Xenia Lu Date: Fri, 17 Oct 2025 19:06:47 +0800 Subject: [PATCH 3/5] refactor: drop MoonBit unstable APIs --- Makefile | 15 +- meson.build | 5 +- src/rustica/adt/clock.c | 40 +++++ src/rustica/bh_log_to_pg.c | 74 +++++++++ src/rustica/datatypes.h | 5 + src/rustica/env.c | 312 ------------------------------------- src/rustica/env.h | 58 ------- src/rustica/main.c | 29 +++- src/rustica/moontest.c | 1 - 9 files changed, 151 insertions(+), 388 deletions(-) create mode 100644 src/rustica/adt/clock.c create mode 100644 src/rustica/bh_log_to_pg.c delete mode 100644 src/rustica/env.c delete mode 100644 src/rustica/env.h diff --git a/Makefile b/Makefile index 121558b..e0c139f 100644 --- a/Makefile +++ b/Makefile @@ -3,9 +3,12 @@ DIST_DIR = $(shell pwd)/dist DEV_DATA_DIR = $(shell pwd)/data DEV_LLVM_DIR = /usr/lib/llvm18 -# Default target: build for development -.PHONY: build -build: +$(BUILD_DIR)/install/.stamp: $(BUILD_DIR)/build/.stamp meson.build meson.py $(shell find src -name '*.c' -o -name '*.h') + uv run meson.py install -C $(BUILD_DIR)/build --destdir=$(BUILD_DIR)/install + touch $@ + +.PHONY: rebuild +rebuild: touch $(BUILD_DIR)/build/.stamp $(MAKE) $(BUILD_DIR)/install/.stamp @@ -13,10 +16,6 @@ $(BUILD_DIR)/build/.stamp: uv run meson.py setup $(BUILD_DIR)/build --prefix=/ -Dllvm_dir=$(DEV_LLVM_DIR) touch $@ -$(BUILD_DIR)/install/.stamp: $(BUILD_DIR)/build/.stamp - uv run meson.py install -C $(BUILD_DIR)/build --destdir=$(BUILD_DIR)/install - touch $@ - # Run a development PostgreSQL instance .PHONY: run run: $(DEV_DATA_DIR)/.stamp @@ -103,4 +102,4 @@ clean: # Delete all build artifacts, development files, and distribution files .PHONY: distclean distclean: clean - rm -rf $(BUILD_DIR) $(DIST_DIR) $(DEV_DATA_DIR) + rm -rf $(BUILD_DIR) $(DIST_DIR) $(DEV_DATA_DIR) subprojects/wasm-micro-runtime-WAMR-* diff --git a/meson.build b/meson.build index e4db339..cd5ebf9 100644 --- a/meson.build +++ b/meson.build @@ -273,10 +273,11 @@ uncommon_shared_lib = shared_library('uncommon_shared', # Build the Rustica Engine rustica_shared = files( - 'src/rustica/env.c', + 'src/rustica/bh_log_to_pg.c', 'src/rustica/datatypes.c', - 'src/rustica/adt/text.c', + 'src/rustica/adt/clock.c', 'src/rustica/adt/stringbuilder.c', + 'src/rustica/adt/text.c', ) rustica_deps = [vmlib, pg.get_variable('uuid')] rustica_cargs = wamr.get_cmake_definitions('-DWASM') diff --git a/src/rustica/adt/clock.c b/src/rustica/adt/clock.c new file mode 100644 index 0000000..70070fd --- /dev/null +++ b/src/rustica/adt/clock.c @@ -0,0 +1,40 @@ +// SPDX-FileCopyrightText: 2025 燕几(北京)科技有限公司 +// SPDX-License-Identifier: Apache-2.0 OR MulanPSL-2.0 + +#include "postgres.h" + +#include "rustica/datatypes.h" + +static uint64_t +realtime_micros_since_unix_epoch(wasm_exec_env_t exec_env) { + struct timespec ts; + clock_gettime(CLOCK_REALTIME, &ts); + return (uint64_t)ts.tv_sec * 1000000 + ts.tv_nsec / 1000; +} + +static wasm_externref_obj_t +monotonic_now(wasm_exec_env_t exec_env) { + obj_t mono = rst_obj_new(exec_env, OBJ_CLOCK_MONOTONIC, NULL, 0); + INSTR_TIME_SET_CURRENT(mono->body.instr_time); + return rst_externref_of_obj(exec_env, mono); +} + +static uint64_t +monotonic_nanos_since(wasm_exec_env_t exec_env, wasm_obj_t ref) { + obj_t mono = wasm_externref_obj_get_obj(ref, OBJ_CLOCK_MONOTONIC); + instr_time it; + INSTR_TIME_SET_CURRENT(it); + INSTR_TIME_SUBTRACT(it, mono->body.instr_time); + return INSTR_TIME_GET_NANOSEC(it); +} + +static NativeSymbol clock_natives[] = { + { "realtime_micros_since_unix_epoch", realtime_micros_since_unix_epoch, "()I" }, + { "monotonic_now", monotonic_now, "()r" }, + { "monotonic_nanos_since", monotonic_nanos_since, "(r)I" }, +}; + +void +rst_register_natives_clock() { + REGISTER_WASM_NATIVES("env", clock_natives); +} diff --git a/src/rustica/bh_log_to_pg.c b/src/rustica/bh_log_to_pg.c new file mode 100644 index 0000000..a1548cb --- /dev/null +++ b/src/rustica/bh_log_to_pg.c @@ -0,0 +1,74 @@ +// SPDX-FileCopyrightText: 2025 燕几(北京)科技有限公司 +// SPDX-License-Identifier: Apache-2.0 OR MulanPSL-2.0 + +#include "postgres.h" + +#include "bh_log.h" + +int +pg_log_vprintf(const char *format, va_list ap) { + int rv = 0; + ereport_domain(LOG, "WAMR", ({ + StringInfoData buf; + initStringInfo(&buf); + for (;;) { + const int needed = + appendStringInfoVA(&buf, format, ap); + if (needed == 0) + break; + enlargeStringInfo(&buf, needed); + } + while (buf.len > 0 && buf.data[buf.len - 1] == '\n') { + buf.data[buf.len - 1] = '\0'; + buf.len -= 1; + } + rv = buf.len; + errmsg_internal(buf.data); + pfree(buf.data); + })); + return rv; +} + +void +pg_bh_log(LogLevel log_level, const char *file, int line, const char *fmt, ...) { + int elevel = LOG; + switch (log_level) { + case BH_LOG_LEVEL_FATAL: + elevel = FATAL; + break; + case BH_LOG_LEVEL_ERROR: + elevel = ERROR; + break; + case BH_LOG_LEVEL_WARNING: + elevel = WARNING; + break; + case BH_LOG_LEVEL_DEBUG: + elevel = DEBUG1; + break; + case BH_LOG_LEVEL_VERBOSE: + elevel = DEBUG3; + break; + } + do { + pg_prevent_errno_in_scope(); + if (errstart(elevel, "WAMR")) { + StringInfoData buf; + initStringInfo(&buf); + for (;;) { + va_list ap; + int needed; + va_start(ap, fmt); + needed = appendStringInfoVA(&buf, fmt, ap); + va_end(ap); + if (needed == 0) + break; + enlargeStringInfo(&buf, needed); + } + errmsg_internal(buf.data); + pfree(buf.data); + errfinish(file, line, "-"); + } + if (elevel >= ERROR) + pg_unreachable(); + } while(0); +} diff --git a/src/rustica/datatypes.h b/src/rustica/datatypes.h index 1961f9a..f5cacea 100644 --- a/src/rustica/datatypes.h +++ b/src/rustica/datatypes.h @@ -30,6 +30,7 @@ #define OBJ_PORTAL 3 #define OBJ_TUPLE_TABLE 4 #define OBJ_HEAP_TUPLE 5 +#define OBJ_CLOCK_MONOTONIC 6 #define OBJ_REFERENCING (1 << 0) #define OBJ_OWNS_BODY (1 << 1) @@ -56,6 +57,7 @@ typedef struct Obj { Portal portal; // only for OBJ_PORTAL SPITupleTable *tuptable; // only for OBJ_TUPLE_TABLE HeapTuple tuple; // only for OBJ_HEAP_TUPLE + instr_time instr_time; // only for OBJ_CLOCK_MONOTONIC void *ptr; // convenient compatible pointer for all types } body; @@ -124,6 +126,9 @@ rst_register_natives_timestamp(); void rst_register_natives_uuid(); +void +rst_register_natives_clock(); + void rst_init_context_for_jsonb(wasm_exec_env_t exec_env); diff --git a/src/rustica/env.c b/src/rustica/env.c deleted file mode 100644 index 332b947..0000000 --- a/src/rustica/env.c +++ /dev/null @@ -1,312 +0,0 @@ -// SPDX-FileCopyrightText: 2025 燕几(北京)科技有限公司 -// SPDX-License-Identifier: Apache-2.0 OR MulanPSL-2.0 - -#include -#include -#include - -#include "postgres.h" -#include "mb/pg_wchar.h" -#include "portability/instr_time.h" - -#include "bh_log.h" - -#include "rustica/env.h" - -char **saved_argv; -int saved_argc; - -static void -obj_finalizer(const wasm_obj_t obj, void *data) { - rustica_value_t val = - (rustica_value_t)wasm_anyref_obj_get_value((wasm_anyref_obj_t)obj); - pfree(val); -} - -rustica_value_t -rustica_value_new(const uint8_t type, void *ptr, const size_t size) { - const rustica_value_t rv = palloc0(sizeof(RusticaValue) + size); - rv->type = type; - if (size == 0) { - rv->ptr = ptr; - } - else { - rv->data = rv->vardata; - if (ptr != NULL) - memcpy(rv->data, ptr, size); - } - return rv; -} - -wasm_externref_obj_t -rustica_value_to_wasm(wasm_exec_env_t exec_env, rustica_value_t val) { - wasm_externref_obj_t rv = wasm_externref_obj_new(exec_env, val); - wasm_obj_set_gc_finalizer(exec_env, - wasm_externref_obj_to_internal_obj(rv), - obj_finalizer, - exec_env); - return rv; -} - -rustica_value_t -rustica_value_from_wasm(wasm_obj_t refobj, uint8_t expected_type) { - wasm_obj_t anyref = - wasm_externref_obj_to_internal_obj((wasm_externref_obj_t)refobj); - rustica_value_t rv = - (rustica_value_t)wasm_anyref_obj_get_value((wasm_anyref_obj_t)anyref); - return rv->type == expected_type ? rv : NULL; -} - -static void -print_char(wasm_exec_env_t exec_env, int ch) { - char buf[5]; - const int len = pg_wchar2mb_with_len((pg_wchar *)&ch, buf, 1); - Assert(len < 5); - buf[len] = 0; - printf("%s", buf); -} - -static NativeSymbol spectest_symbols[] = { - { "print_char", print_char, "(i)", NULL }, -}; - -static void -exception_throw(wasm_exec_env_t exec_env) { - wasm_module_inst_t inst = wasm_runtime_get_module_inst(exec_env); - wasm_runtime_set_exception(inst, "panic"); -} - -static NativeSymbol exception_symbols[] = { - { "throw", exception_throw, "()", NULL }, -}; - -static StringReader * -fs_begin_read_string(wasm_exec_env_t exec_env, wasm_obj_t ref) { - rustica_value_t val = rustica_value_from_wasm(ref, RUSTICA_ENV_CSTRING); - StringReader *buf = palloc(sizeof(StringReader)); - buf->offset = 0; - buf->size = strlen(val->data); - buf->data = val->data; - return buf; -} - -static int32_t -fs_string_read_char(wasm_exec_env_t exec_env, StringReader *buf) { - int ch; - pg_wchar char_code[2] = { '?', 0 }; - if (buf->offset >= buf->size) { - return -1; // EOF - } - // Read the next Unicode char code using Postgres C functions - ch = pg_utf_mblen((unsigned char *)buf->data + buf->offset); - if (ch > 0) { - pg_encoding_mb2wchar_with_len(PG_UTF8, - buf->data + buf->offset, - char_code, - ch); - buf->offset += ch; - } - else { - // Invalid UTF-8 sequence, skip one byte - buf->offset += 1; - } - return (int32_t)char_code[0]; -} - -static void -fs_finish_read_string(wasm_exec_env_t exec_env, StringReader *buf) { - pfree(buf); -} - -static ArrayReader * -fs_begin_read_string_array(wasm_exec_env_t exec_env, wasm_obj_t ref) { - rustica_value_t val = - rustica_value_from_wasm(ref, RUSTICA_ENV_CSTRING_ARRAY); - ArrayReader *reader = palloc(sizeof(ArrayReader)); - reader->offset = 0; - reader->arr = val->arr; - return reader; -} - -static wasm_externref_obj_t -fs_string_array_read_string(wasm_exec_env_t exec_env, ArrayReader *reader) { - rustica_value_t rv; - if (reader->offset >= reader->arr->size) { - rv = rustica_value_new(RUSTICA_ENV_CSTRING, - "ffi_end_of_/string_array", - 0); - } - else { - rv = rustica_value_new(RUSTICA_ENV_CSTRING, - reader->arr->items[reader->offset], - 0); - reader->offset += 1; - } - return rustica_value_to_wasm(exec_env, rv); -} - -static void -fs_finish_read_string_array(wasm_exec_env_t exec_env, ArrayReader *reader) { - pfree(reader); -} - -static wasm_externref_obj_t -fs_current_dir(wasm_exec_env_t exec_env) { - char buf[1024]; - rustica_value_t rv; - - getcwd(buf, sizeof(buf)); - rv = rustica_value_new(RUSTICA_ENV_CSTRING, buf, strlen(buf) + 1); - return rustica_value_to_wasm(exec_env, rv); -} - -static wasm_externref_obj_t -fs_args_get(wasm_exec_env_t exec_env) { - // return sys argv as rustica_value_t - rustica_value_t rv = rustica_value_new(RUSTICA_ENV_CSTRING_ARRAY, - NULL, - sizeof(RusticaPointerArray) - + sizeof(char *) * saved_argc); - rv->arr->size = saved_argc; - for (int i = 0; i < saved_argc; i++) { - rv->arr->items[i] = saved_argv[i]; - } - return rustica_value_to_wasm(exec_env, rv); -} - -static NativeSymbol fs_symbols[] = { - { "begin_read_string", fs_begin_read_string, "(r)r", NULL }, - { "string_read_char", fs_string_read_char, "(r)i", NULL }, - { "finish_read_string", fs_finish_read_string, "(r)", NULL }, - { "begin_read_string_array", fs_begin_read_string_array, "(r)r", NULL }, - { "string_array_read_string", fs_string_array_read_string, "(r)r", NULL }, - { "finish_read_string_array", fs_finish_read_string_array, "(r)", NULL }, - { "current_dir", fs_current_dir, "()r", NULL }, - { "args_get", fs_args_get, "()r", NULL }, -}; - -static uint64_t -time_now(wasm_exec_env_t exec_env) { - // return current time in milliseconds since epoch - struct timespec ts; - clock_gettime(CLOCK_REALTIME, &ts); - return (uint64_t)ts.tv_sec * 1000 + ts.tv_nsec / 1000000; -} - -static wasm_externref_obj_t -time_instant_now(wasm_exec_env_t exec_env) { - rustica_value_t rv = - rustica_value_new(RUSTICA_ENV_INSTR_TIME, NULL, sizeof(instr_time)); - INSTR_TIME_SET_CURRENT(*rv->instr_time); - return rustica_value_to_wasm(exec_env, rv); -} - -static double -time_instant_elapsed_as_secs_f64(wasm_exec_env_t exec_env, wasm_obj_t ref) { - rustica_value_t val = rustica_value_from_wasm(ref, RUSTICA_ENV_INSTR_TIME); - instr_time it; - INSTR_TIME_SET_CURRENT(it); - INSTR_TIME_SUBTRACT(it, *val->instr_time); - return INSTR_TIME_GET_DOUBLE(it); -} - -static NativeSymbol time_symbols[] = { - { "now", time_now, "()I", NULL }, - { "instant_now", time_instant_now, "()r", NULL }, - { "instant_elapsed_as_secs_f64", - time_instant_elapsed_as_secs_f64, - "(r)F", - NULL }, -}; - -void -rustica_register_natives() { - if (!wasm_runtime_register_natives("spectest", - spectest_symbols, - sizeof(spectest_symbols) - / sizeof(NativeSymbol))) - ereport(ERROR, errmsg("Failed to register spectest natives")); - if (!wasm_runtime_register_natives("exception", - exception_symbols, - sizeof(exception_symbols) - / sizeof(NativeSymbol))) - ereport(ERROR, errmsg("Failed to register exception natives")); - if (!wasm_runtime_register_natives("__moonbit_fs_unstable", - fs_symbols, - sizeof(fs_symbols) - / sizeof(NativeSymbol))) - ereport(ERROR, errmsg("Failed to register fs natives")); - if (!wasm_runtime_register_natives("__moonbit_time_unstable", - time_symbols, - sizeof(time_symbols) - / sizeof(NativeSymbol))) - ereport(ERROR, errmsg("Failed to register time natives")); -} - -int -pg_log_vprintf(const char *format, va_list ap) { - int rv = 0; - ereport_domain(LOG, "WAMR", ({ - StringInfoData buf; - initStringInfo(&buf); - for (;;) { - const int needed = - appendStringInfoVA(&buf, format, ap); - if (needed == 0) - break; - enlargeStringInfo(&buf, needed); - } - while (buf.len > 0 && buf.data[buf.len - 1] == '\n') { - buf.data[buf.len - 1] = '\0'; - buf.len -= 1; - } - rv = buf.len; - errmsg_internal(buf.data); - pfree(buf.data); - })); - return rv; -} - -void -pg_bh_log(LogLevel log_level, const char *file, int line, const char *fmt, ...) { - int elevel = LOG; - switch (log_level) { - case BH_LOG_LEVEL_FATAL: - elevel = FATAL; - break; - case BH_LOG_LEVEL_ERROR: - elevel = ERROR; - break; - case BH_LOG_LEVEL_WARNING: - elevel = WARNING; - break; - case BH_LOG_LEVEL_DEBUG: - elevel = DEBUG1; - break; - case BH_LOG_LEVEL_VERBOSE: - elevel = DEBUG3; - break; - } - do { - pg_prevent_errno_in_scope(); - if (errstart(elevel, "WAMR")) { - StringInfoData buf; - initStringInfo(&buf); - for (;;) { - va_list ap; - int needed; - va_start(ap, fmt); - needed = appendStringInfoVA(&buf, fmt, ap); - va_end(ap); - if (needed == 0) - break; - enlargeStringInfo(&buf, needed); - } - errmsg_internal(buf.data); - pfree(buf.data); - errfinish(file, line, "-"); - } - if (elevel >= ERROR) - pg_unreachable(); - } while(0); -} diff --git a/src/rustica/env.h b/src/rustica/env.h deleted file mode 100644 index 40770f5..0000000 --- a/src/rustica/env.h +++ /dev/null @@ -1,58 +0,0 @@ -// SPDX-FileCopyrightText: 2025 燕几(北京)科技有限公司 -// SPDX-License-Identifier: Apache-2.0 OR MulanPSL-2.0 - -#ifndef RUSTICA_ENV_H -#define RUSTICA_ENV_H - -#include "portability/instr_time.h" -#include "gc_export.h" - -#define RUSTICA_ENV_CSTRING 0x01 -#define RUSTICA_ENV_CSTRING_ARRAY 0x02 -#define RUSTICA_ENV_INSTR_TIME 0x03 - -extern char **saved_argv; -extern int saved_argc; - -typedef struct RusticaPointerArray { - size_t size; - void *items[]; -} RusticaPointerArray; - -typedef struct RusticaValue { - uint8_t type; - union { - void *ptr; - char *data; - RusticaPointerArray *arr; - instr_time *instr_time; - }; - char vardata[]; -} RusticaValue; - -typedef RusticaValue *rustica_value_t; - -typedef struct StringReader { - uint32_t offset; - uint32_t size; - char *data; -} StringReader; - -typedef struct ArrayReader { - size_t offset; - RusticaPointerArray *arr; -} ArrayReader; - -rustica_value_t -rustica_value_new(uint8_t type, void *ptr, size_t size); - -wasm_externref_obj_t -rustica_value_to_wasm(wasm_exec_env_t exec_env, rustica_value_t val); - -rustica_value_t -rustica_value_from_wasm(wasm_obj_t refobj, uint8_t expected_type); - -void -rustica_register_natives(); - -#endif diff --git a/src/rustica/main.c b/src/rustica/main.c index 497dadd..8949e18 100644 --- a/src/rustica/main.c +++ b/src/rustica/main.c @@ -14,7 +14,6 @@ #include "aot_export.h" #include "rustica/datatypes.h" -#include "rustica/env.h" #include "rustica/moontest.h" #define HEAP_M 512 @@ -31,6 +30,7 @@ run_moontest(wasm_exec_env_t exec_env, va_list args); static void run_wasm_with(const char *wasm_file, bool use_aot, + bool is_moontest, void (*fn)(wasm_exec_env_t, va_list), ...); @@ -125,8 +125,6 @@ main(int argc, char *argv[]) { bool use_aot = true; int rv = 0; - saved_argc = argc; - saved_argv = argv; progname = get_progname(argv[0]); MemoryContextInit(); @@ -156,7 +154,7 @@ main(int argc, char *argv[]) { case RUN: if (optind >= argc) ereport(ERROR, errmsg("No wasm file specified for 'run'")); - run_wasm_with(argv[optind], use_aot, run_start); + run_wasm_with(argv[optind], use_aot, false, run_start); break; case MOONTEST: @@ -169,6 +167,7 @@ main(int argc, char *argv[]) { if (wasm_file) run_wasm_with(wasm_file, use_aot, + true, run_moontest, moontest_spec); } @@ -202,8 +201,18 @@ main(int argc, char *argv[]) { return rv; } +static void +exception_throw(wasm_exec_env_t exec_env) { + wasm_module_inst_t inst = wasm_runtime_get_module_inst(exec_env); + wasm_runtime_set_exception(inst, "panic"); +} + +static NativeSymbol exception_symbols[] = { + { "throw", exception_throw, "()", NULL }, +}; + static inline void -init_wamr() { +init_wamr(bool is_moontest) { RuntimeInitArgs init_args = { .mem_alloc_type = Alloc_With_Allocator, .mem_alloc_option = { .allocator = { .malloc_func = palloc, @@ -216,9 +225,14 @@ init_wamr() { if (!wasm_runtime_full_init(&init_args)) ereport(ERROR, errmsg("Failed to initialize WASM runtime")); - rustica_register_natives(); rst_register_natives_text(); rst_register_natives_stringbuilder(); + rst_register_natives_clock(); + if (is_moontest && !wasm_runtime_register_natives("exception", + exception_symbols, + sizeof(exception_symbols) + / sizeof(NativeSymbol))) + ereport(ERROR, errmsg("Failed to register exception natives")); } static inline uint8_t * @@ -342,6 +356,7 @@ run_moontest(wasm_exec_env_t exec_env, va_list args) { static void run_wasm_with(const char *wasm_file, bool use_aot, + bool is_moontest, void (*fn)(wasm_exec_env_t, va_list), ...) { bool wamr_inited = false; @@ -354,7 +369,7 @@ run_wasm_with(const char *wasm_file, PG_TRY(); { - init_wamr(); + init_wamr(is_moontest); wamr_inited = true; buffer = read_wasm_file(wasm_file, &size); if (use_aot) { diff --git a/src/rustica/moontest.c b/src/rustica/moontest.c index 2a34473..54e9900 100644 --- a/src/rustica/moontest.c +++ b/src/rustica/moontest.c @@ -8,7 +8,6 @@ #include "utils/builtins.h" #include "utils/json.h" -#include "rustica/env.h" #include "rustica/datatypes.h" #include "rustica/moontest.h" From edac3b6cfe1dc1cacceea3ef6a25eef436bfe663 Mon Sep 17 00:00:00 2001 From: Xenia Lu Date: Sat, 18 Oct 2025 09:19:36 +0800 Subject: [PATCH 4/5] clang-format --- src/rustica/adt/clock.c | 4 +++- src/rustica/adt/stringbuilder.c | 8 +++++--- src/rustica/adt/text.c | 36 ++++++++++++++++++++++----------- src/rustica/bh_log_to_pg.c | 8 ++++++-- src/rustica/main.c | 9 +++++---- src/rustica/moontest.c | 7 +++++-- 6 files changed, 48 insertions(+), 24 deletions(-) diff --git a/src/rustica/adt/clock.c b/src/rustica/adt/clock.c index 70070fd..94071f8 100644 --- a/src/rustica/adt/clock.c +++ b/src/rustica/adt/clock.c @@ -29,7 +29,9 @@ monotonic_nanos_since(wasm_exec_env_t exec_env, wasm_obj_t ref) { } static NativeSymbol clock_natives[] = { - { "realtime_micros_since_unix_epoch", realtime_micros_since_unix_epoch, "()I" }, + { "realtime_micros_since_unix_epoch", + realtime_micros_since_unix_epoch, + "()I" }, { "monotonic_now", monotonic_now, "()r" }, { "monotonic_nanos_since", monotonic_nanos_since, "(r)I" }, }; diff --git a/src/rustica/adt/stringbuilder.c b/src/rustica/adt/stringbuilder.c index 6c73c23..3b9c695 100644 --- a/src/rustica/adt/stringbuilder.c +++ b/src/rustica/adt/stringbuilder.c @@ -28,13 +28,14 @@ sb_read_text(wasm_exec_env_t exec_env, wasm_obj_t ref) { Datum txt_datum = wasm_externref_obj_get_datum(ref, TEXTOID); text *txt = DatumGetTextPP(txt_datum); char *start = VARDATA_ANY(txt); - obj_t obj = rst_obj_new(exec_env, OBJ_STRING_INFO, ref, sizeof(StringInfoData)); + obj_t obj = + rst_obj_new(exec_env, OBJ_STRING_INFO, ref, sizeof(StringInfoData)); if (txt_datum != PointerGetDatum(txt)) obj->flags |= OBJ_OWNS_BODY_MEMBERS; obj->body.sb->data = (char *)txt; obj->body.sb->cursor = start - (char *)txt; obj->body.sb->len = VARSIZE_ANY_EXHDR(txt) + obj->body.sb->cursor; - obj->body.sb->maxlen = 0; // read-only + obj->body.sb->maxlen = 0; // read-only return rst_externref_of_obj(exec_env, obj); } @@ -140,7 +141,8 @@ sb_read_char(wasm_exec_env_t exec_env, wasm_obj_t refobj) { if (ch > 0 && sb->cursor + ch <= sb->len) { pg_mb2wchar_with_len(sb->data + sb->cursor, rv, ch); sb->cursor += ch; - } else { + } + else { // Invalid UTF-8 sequence, skip one byte sb->cursor += 1; } diff --git a/src/rustica/adt/text.c b/src/rustica/adt/text.c index 17099a6..b86ef19 100644 --- a/src/rustica/adt/text.c +++ b/src/rustica/adt/text.c @@ -150,7 +150,7 @@ utf16_count_code_units(const char *mbstr, int size) { else if (mb_offset + ch > size) break; // Incomplete character at end of string if (ch < 4) - count += 1; // Normal character + count += 1; // Normal character else count += 2; // Surrogate pair mb_offset += ch; @@ -167,7 +167,8 @@ utf16_length(wasm_exec_env_t exec_env, wasm_obj_t obj) { int count; if (pg_database_encoding_max_length() == 1) { count = size; - } else { + } + else { count = utf16_count_code_units(data, size); } RST_FREE_IF_COPY(t, str); @@ -188,7 +189,8 @@ utf16_char_code_at(wasm_exec_env_t exec_env, wasm_obj_t ref, int32_t index) { if (index < 0 || index >= size) goto oob_error; rv[0] = (unsigned char)data[index]; - } else { + } + else { int mb_offset = 0; int utf16_index = 0; int32_t utf16_size = utf16_count_code_units(data, size); @@ -233,12 +235,15 @@ utf16_char_code_at(wasm_exec_env_t exec_env, wasm_obj_t ref, int32_t index) { } static wasm_externref_obj_t -utf16_from_char_code_array(wasm_exec_env_t exec_env, wasm_obj_t obj, int32_t start, int32_t length) { +utf16_from_char_code_array(wasm_exec_env_t exec_env, + wasm_obj_t obj, + int32_t start, + int32_t length) { wasm_array_obj_t arr = (wasm_array_obj_t)obj; uint32_t size = wasm_array_obj_length(arr); StringInfoData buf; wasm_externref_obj_t rv; - const char* err = "fromCharCodeArray failed"; + const char *err = "fromCharCodeArray failed"; if (length < 0) length = size; // To the end @@ -246,11 +251,14 @@ utf16_from_char_code_array(wasm_exec_env_t exec_env, wasm_obj_t obj, int32_t sta start += size; if (start < 0 || start > size) ereport(ERROR, - (errcode(ERRCODE_SUBSTRING_ERROR), errmsg("start index out of range"))); + (errcode(ERRCODE_SUBSTRING_ERROR), + errmsg("start index out of range"))); if (start + length > size) length = size - start; // Adjust to fit if (length == 0) - return rst_externref_of_owned_datum(exec_env, CStringGetTextDatum(""), TEXTOID); + return rst_externref_of_owned_datum(exec_env, + CStringGetTextDatum(""), + TEXTOID); initStringInfo(&buf); for (uint32_t i = start; i < size; i++) { wasm_value_t value; @@ -271,18 +279,22 @@ utf16_from_char_code_array(wasm_exec_env_t exec_env, wasm_obj_t obj, int32_t sta if (is_utf16_surrogate_second(value.i32)) { ch = surrogate_pair_to_codepoint(cu, value.i32); i++; // Consumed the low surrogate - } else { + } + else { err = "Invalid surrogate pair"; goto error; } - } else { + } + else { err = "Incomplete surrogate pair"; goto error; } - } else if (is_utf16_surrogate_second(cu)) { + } + else if (is_utf16_surrogate_second(cu)) { err = "Unmatched low surrogate"; goto error; - } else { + } + else { ch = cu; // Normal code unit } unicode_to_utf8(ch, utf8); @@ -302,7 +314,7 @@ static wasm_externref_obj_t utf16_from_code_point(wasm_exec_env_t exec_env, int32_t code_point) { unsigned char buf[5]; if (code_point == 0) { - obj_t obj = rst_obj_new(exec_env, OBJ_DATUM, NULL, VARHDRSZ_SHORT + 1); + obj_t obj = rst_obj_new(exec_env, OBJ_DATUM, NULL, VARHDRSZ_SHORT + 1); obj->oid = TEXTOID; SET_VARSIZE_1B(obj->body.ptr, VARHDRSZ_SHORT + 1); VARDATA_ANY(obj->body.ptr)[0] = '\0'; diff --git a/src/rustica/bh_log_to_pg.c b/src/rustica/bh_log_to_pg.c index a1548cb..19eb2e5 100644 --- a/src/rustica/bh_log_to_pg.c +++ b/src/rustica/bh_log_to_pg.c @@ -30,7 +30,11 @@ pg_log_vprintf(const char *format, va_list ap) { } void -pg_bh_log(LogLevel log_level, const char *file, int line, const char *fmt, ...) { +pg_bh_log(LogLevel log_level, + const char *file, + int line, + const char *fmt, + ...) { int elevel = LOG; switch (log_level) { case BH_LOG_LEVEL_FATAL: @@ -70,5 +74,5 @@ pg_bh_log(LogLevel log_level, const char *file, int line, const char *fmt, ...) } if (elevel >= ERROR) pg_unreachable(); - } while(0); + } while (0); } diff --git a/src/rustica/main.c b/src/rustica/main.c index 8949e18..02463ed 100644 --- a/src/rustica/main.c +++ b/src/rustica/main.c @@ -228,10 +228,11 @@ init_wamr(bool is_moontest) { rst_register_natives_text(); rst_register_natives_stringbuilder(); rst_register_natives_clock(); - if (is_moontest && !wasm_runtime_register_natives("exception", - exception_symbols, - sizeof(exception_symbols) - / sizeof(NativeSymbol))) + if (is_moontest + && !wasm_runtime_register_natives("exception", + exception_symbols, + sizeof(exception_symbols) + / sizeof(NativeSymbol))) ereport(ERROR, errmsg("Failed to register exception natives")); } diff --git a/src/rustica/moontest.c b/src/rustica/moontest.c index 54e9900..f4ec7b0 100644 --- a/src/rustica/moontest.c +++ b/src/rustica/moontest.c @@ -196,8 +196,11 @@ json_object_end_cb(void *state) { if (!filename) { Assert(parse_state->filename != NULL); - filename = cstring_into_varatt_obj(parse_state->exec_env, - parse_state->filename, strlen(parse_state->filename), TEXTOID); + filename = + cstring_into_varatt_obj(parse_state->exec_env, + parse_state->filename, + strlen(parse_state->filename), + TEXTOID); } args[0].kind = WASM_EXTERNREF; From 12f206db8b6c96d8ef0139c2c0a9c332a4b6e0c3 Mon Sep 17 00:00:00 2001 From: Xenia Lu Date: Sat, 18 Oct 2025 09:32:43 +0800 Subject: [PATCH 5/5] fix: don't include module.h? --- src/rustica/adt/stringbuilder.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/rustica/adt/stringbuilder.c b/src/rustica/adt/stringbuilder.c index 3b9c695..7d65a3b 100644 --- a/src/rustica/adt/stringbuilder.c +++ b/src/rustica/adt/stringbuilder.c @@ -8,7 +8,6 @@ #include "wasm_runtime_common.h" #include "rustica/datatypes.h" -#include "rustica/module.h" static wasm_externref_obj_t sb_new(wasm_exec_env_t exec_env, int32_t size_hint) {