Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[LLVM IR] Add support for Unicode strings #9764

Merged
merged 6 commits into from
Feb 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,8 @@ public ApplicationConfigNativeAssemblyGenerator (IDictionary<string, string> env

protected override void Construct (LlvmIrModule module)
{
module.DefaultStringGroup = "env";
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is a "default string group"? Anything to link to in the commit message? (Is this why .apkdesc files needed updating?)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's an internal implementation detail, it makes the string manager put strings not assigned to a particular group in this one. It's done this way because all LLVM IR source files are generated separately and we can't be sure that (local) string constant names (e.g. the former-default .str.X where X is a per-module counter) wouldn't clash between the different generated files. So this is a way to "namespace" strings. If we were able to keep a shared state between all LLVM IR generators, it wouldn't be necessary.


MapStructures (module);

module.AddGlobalVariable ("format_tag", FORMAT_TAG, comment: $" 0x{FORMAT_TAG:x}");
Expand All @@ -211,7 +213,7 @@ protected override void Construct (LlvmIrModule module)
var envVars = new LlvmIrGlobalVariable (environmentVariables, "app_environment_variables") {
Comment = " Application environment variables array, name:value",
};
module.Add (envVars, stringGroupName: "env", stringGroupComment: " Application environment variables name:value pairs");
module.Add (envVars, stringGroupName: "env.var", stringGroupComment: " Application environment variables name:value pairs");

var sysProps = new LlvmIrGlobalVariable (systemProperties, "app_system_properties") {
Comment = " System properties defined by the application",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ void InitCompressedAssemblies (out List<LlvmIrGlobalVariable>? compressedAssembl

protected override void Construct (LlvmIrModule module)
{
module.DefaultStringGroup = "cas";

MapStructures (module);

InitCompressedAssemblies (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,8 @@ uint GetLength (string str)

protected override void Construct (LlvmIrModule module)
{
module.DefaultStringGroup = "jremap";

MapStructures (module);
List<StructureInstance<JniRemappingTypeReplacementEntry>>? typeReplacements;
List<StructureInstance<JniRemappingIndexTypeEntry>>? methodIndexTypes;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ protected LlvmIrComposer (TaskLoggingHelper log)

public LlvmIrModule Construct ()
{
var module = new LlvmIrModule (cache);
var module = new LlvmIrModule (cache, Log);
Construct (module);
module.AfterConstruction ();
constructed = true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,12 +199,22 @@ void WriteStrings (GeneratorWriteContext context)
}

foreach (LlvmIrStringVariable info in group.Strings) {
string s = QuoteString ((string)info.Value, out ulong size);
string s = QuoteString (info, out ulong size);

WriteGlobalVariableStart (context, info);
if (!info.IsConstantStringLiteral) {
WriteCommentLine (context, $" '{info.Value}'");
}

WriteGlobalVariableName (context, info);

// Strings must always be local symbols, global variables will point to them
WriteVariableOptions (context, LlvmIrVariableOptions.LocalString);
context.Output.Write ('[');
context.Output.Write (size.ToString (CultureInfo.InvariantCulture));
context.Output.Write (" x i8] c");
context.Output.Write ($" x {info.IrType}] ");
if (info.IsConstantStringLiteral) {
context.Output.Write ('c');
}
context.Output.Write (s);
context.Output.Write (", align ");
context.Output.WriteLine (target.GetAggregateAlignment (1, size).ToString (CultureInfo.InvariantCulture));
Expand Down Expand Up @@ -246,23 +256,37 @@ void WriteGlobalVariables (GeneratorWriteContext context)
}
}

void WriteGlobalVariableStart (GeneratorWriteContext context, LlvmIrGlobalVariable variable)
void WriteGlobalVariableName (GeneratorWriteContext context, LlvmIrGlobalVariable variable)
{
if (!String.IsNullOrEmpty (variable.Comment)) {
WriteCommentLine (context, variable.Comment);
}
context.Output.Write ('@');
context.Output.Write (variable.Name);
context.Output.Write (" = ");
}

LlvmIrVariableOptions options = variable.Options ?? LlvmIrGlobalVariable.DefaultOptions;
void WriteVariableOptions (GeneratorWriteContext context, LlvmIrVariableOptions options)
{
WriteLinkage (context, options.Linkage);
WritePreemptionSpecifier (context, options.RuntimePreemption);
WriteVisibility (context, options.Visibility);
WriteAddressSignificance (context, options.AddressSignificance);
WriteWritability (context, options.Writability);
}

void WriteVariableOptions (GeneratorWriteContext context, LlvmIrGlobalVariable variable, LlvmIrVariableOptions? defaultOptions = null)
{
LlvmIrVariableOptions options = variable.Options ?? defaultOptions ?? LlvmIrGlobalVariable.DefaultOptions;
WriteVariableOptions (context, options);
}

void WriteGlobalVariableStart (GeneratorWriteContext context, LlvmIrGlobalVariable variable)
{
WriteGlobalVariableName (context, variable);
WriteVariableOptions (context, variable, LlvmIrGlobalVariable.DefaultOptions);
}

void WriteGlobalVariable (GeneratorWriteContext context, LlvmIrGlobalVariable variable)
{
if (!context.InVariableGroup) {
Expand Down Expand Up @@ -319,13 +343,22 @@ void WriteTypeAndValue (GeneratorWriteContext context, LlvmIrVariable variable,
throw new InvalidOperationException ($"Internal error: variable '{variable.Name}'' of type {variable.Type} must not have a null value");
}

if (valueType != variable.Type && !LlvmIrModule.NameValueArrayType.IsAssignableFrom (variable.Type)) {
if (!IsValueAssignableFrom (valueType, variable) && !IsValueAssignableFrom (LlvmIrModule.NameValueArrayType, variable)) {
throw new InvalidOperationException ($"Internal error: variable type '{variable.Type}' is different to its value type, '{valueType}'");
}

WriteValue (context, valueType, variable);
}

bool IsValueAssignableFrom (Type valueType, LlvmIrVariable variable)
{
if (valueType != typeof(string) && valueType != typeof(StringHolder)) {
return valueType.IsAssignableFrom (variable.Type);
}

return variable.Type == typeof(string) || variable.Type == typeof(StringHolder);
}

ulong GetAggregateValueElementCount (GeneratorWriteContext context, LlvmIrVariable variable) => GetAggregateValueElementCount (context, variable.Type, variable.Value, variable as LlvmIrGlobalVariable);

ulong GetAggregateValueElementCount (GeneratorWriteContext context, Type type, object? value, LlvmIrGlobalVariable? globalVariable = null)
Expand Down Expand Up @@ -560,7 +593,7 @@ void WriteInlineArray (GeneratorWriteContext context, byte[] bytes, bool encodeA
{
if (encodeAsASCII) {
context.Output.Write ('c');
context.Output.Write (QuoteString (bytes, bytes.Length, out _, nullTerminated: false));
context.Output.Write (QuoteUtf8String (bytes, bytes.Length, out _, nullTerminated: false));
return;
}

Expand Down Expand Up @@ -616,7 +649,7 @@ void WriteValue (GeneratorWriteContext context, StructureInstance structInstance
return;
}

WriteValue (context, smi.MemberType, value);
WriteValue (context, smi.MemberType, value, smi.Info.GetStringEncoding (context.TypeCache));
}

bool WriteNativePointerValue (GeneratorWriteContext context, StructureInstance si, StructureMemberInfo smi, object? value)
Expand Down Expand Up @@ -670,7 +703,7 @@ string ToHex (BasicType basicTypeDesc, Type type, object? value)
return $"{(basicTypeDesc.IsUnsigned ? prefixUnsigned : prefixSigned)}0x{hex}";
}

void WriteValue (GeneratorWriteContext context, Type type, object? value)
void WriteValue (GeneratorWriteContext context, Type type, object? value, LlvmIrStringEncoding stringEncoding = LlvmIrStringEncoding.UTF8)
{
if (value is LlvmIrVariable variableRef) {
context.Output.Write (variableRef.Reference);
Expand Down Expand Up @@ -710,13 +743,13 @@ void WriteValue (GeneratorWriteContext context, Type type, object? value)
return;
}

if (type == typeof(string)) {
if (type == typeof(string) || type == typeof(StringHolder)) {
if (value == null) {
context.Output.Write ("null");
return;
}

LlvmIrStringVariable sv = context.Module.LookupRequiredVariableForString ((string)value);
LlvmIrStringVariable sv = context.Module.LookupRequiredVariableForString (StringHolder.AsHolder (value, stringEncoding));
context.Output.Write (sv.Reference);
return;
}
Expand Down Expand Up @@ -775,7 +808,7 @@ void WriteStructureValue (GeneratorWriteContext context, StructureInstance? inst
string? comment = info.GetCommentFromProvider (smi, instance);
if (String.IsNullOrEmpty (comment)) {
var sb = new StringBuilder (" ");
sb.Append (MapManagedTypeToNative (smi));
sb.Append (MapManagedTypeToNative (context, smi));
sb.Append (' ');
sb.Append (smi.Info.Name);
comment = sb.ToString ();
Expand Down Expand Up @@ -1460,8 +1493,12 @@ public static string MapManagedTypeToNative (Type type)
return type.GetShortName ();
}

static string MapManagedTypeToNative (StructureMemberInfo smi)
static string MapManagedTypeToNative (GeneratorWriteContext context, StructureMemberInfo smi)
{
if (smi.Info.IsUnicodeString (context.TypeCache)) {
return "char16_t*";
}

string nativeType = MapManagedTypeToNative (smi.MemberType);
// Silly, but effective
if (nativeType[nativeType.Length - 1] == '*') {
Expand All @@ -1487,8 +1524,9 @@ static string MapManagedTypeToNative (StructureMemberInfo smi)
throw new InvalidOperationException ($"Field '{smi.Info.Name}' of structure '{info.Name}' should have a value of '{expectedType}' type, instead it had a '{value.GetType ()}'");
}

if (valueType == typeof(string)) {
return context.Module.LookupRequiredVariableForString ((string)value);
if (valueType == typeof(string) || valueType == typeof(StringHolder)) {
var encoding = smi.Info.GetStringEncoding (context.TypeCache);
return context.Module.LookupRequiredVariableForString (StringHolder.AsHolder (value, encoding));
}

return value;
Expand Down Expand Up @@ -1555,30 +1593,63 @@ public static string QuoteStringNoEscape (string s)
return $"\"{s}\"";
}

public static string QuoteString (string value, bool nullTerminated = true)
public static string QuoteString (LlvmIrStringVariable variable, out ulong stringSize, bool nullTerminated = true)
{
return QuoteString (value, out _, nullTerminated);
}
if (variable.Encoding == LlvmIrStringEncoding.UTF8) {
var value = (StringHolder)variable.Value;
if (value.Data == null) {
throw new InvalidOperationException ("Internal error: null strings not supported here, they should be handled elsewhere.");
}

public static string QuoteString (byte[] bytes)
{
return QuoteString (bytes, bytes.Length, out _, nullTerminated: false);
int byteCount = Encoding.UTF8.GetByteCount (value.Data);
var bytes = ArrayPool<byte>.Shared.Rent (byteCount);

try {
Encoding.UTF8.GetBytes (value.Data, 0, value.Data.Length, bytes, 0);
return QuoteUtf8String (bytes, byteCount, out stringSize, nullTerminated);
} finally {
ArrayPool<byte>.Shared.Return (bytes);
}
}

if (variable.Encoding == LlvmIrStringEncoding.Unicode) {
return QuoteUnicodeString (variable, out stringSize, nullTerminated);
}

throw new InvalidOperationException ($"Internal error: unsupported string encoding {variable.Encoding}");
}

public static string QuoteString (string value, out ulong stringSize, bool nullTerminated = true)
static string QuoteUnicodeString (LlvmIrStringVariable variable, out ulong stringSize, bool nullTerminated = true)
{
var encoding = Encoding.UTF8;
int byteCount = encoding.GetByteCount (value);
var bytes = ArrayPool<byte>.Shared.Rent (byteCount);
try {
encoding.GetBytes (value, 0, value.Length, bytes, 0);
return QuoteString (bytes, byteCount, out stringSize, nullTerminated);
} finally {
ArrayPool<byte>.Shared.Return (bytes);
var value = (StringHolder)variable.Value;
if (value.Data == null) {
throw new InvalidOperationException ("Internal error: null strings not supported here, they should be handled elsewhere.");
}

// Each character/lexeme is encoded as iXY u0xVXYZ + comma and a space, and on top of that we have two square brackets and a trailing nul
var sb = new StringBuilder ((value.Data.Length * 13) + 3); // rough estimate of capacity
sb.Append ('[');
for (int i = 0; i < value.Data.Length; i++) {
var ch = (short)value.Data[i];
if (i > 0) {
sb.Append (", ");
}
sb.Append ($"{variable.IrType} u0x{ch:X2}");
}

if (nullTerminated) {
if (value.Data.Length > 0) {
sb.Append (", ");
}
sb.Append ($"{variable.IrType} 0");
}
sb.Append (']');

stringSize = (ulong)value.Data.Length + (nullTerminated ? 1u : 0u);
return sb.ToString ();
}

public static string QuoteString (byte[] bytes, int byteCount, out ulong stringSize, bool nullTerminated = true)
static string QuoteUtf8String (byte[] bytes, int byteCount, out ulong stringSize, bool nullTerminated = true)
{
var sb = new StringBuilder (byteCount * 2); // rough estimate of capacity

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -366,8 +366,8 @@ void WriteArgument (GeneratorWriteContext context, LlvmIrFunctionParameter? para
throw new InvalidOperationException ($"Internal error: value type '{value.GetType ()}' for argument {index} to function '{function.Signature.Name}' is invalid. Expected '{parameter.Type}' or compatible");
}

if (value is string str) {
context.Output.Write (context.Module.LookupRequiredVariableForString (str).Reference);
if (value is string || value is StringHolder) {
context.Output.Write (context.Module.LookupRequiredVariableForString (StringHolder.AsHolder (value)).Reference);
return;
}

Expand Down
Loading
Loading