diff --git a/compiler/AST/build.cpp b/compiler/AST/build.cpp index 69e646d1990d..189f94bfe897 100644 --- a/compiler/AST/build.cpp +++ b/compiler/AST/build.cpp @@ -1993,6 +1993,8 @@ buildClassDefExpr(const char* name, // possible in the compiler. gatherWellKnownTypes runs too late to be of use // to us. if (strcmp("string", name) == 0) { + // If we want to change dtString to refer to string(ascii), + // this might be the place to do it. *dtString = *ct; // These fields get overwritten with `ct` by the assignment. These fields are // set to `this` by the AggregateType constructor so they should still be diff --git a/compiler/AST/symbol.cpp b/compiler/AST/symbol.cpp index ace51bc06971..83f8c412d036 100644 --- a/compiler/AST/symbol.cpp +++ b/compiler/AST/symbol.cpp @@ -561,6 +561,9 @@ GenRet VarSymbol::codegen() { if( outfile ) { // dtString immediates don't actually codegen as immediates, we just use // them for param string functionality. + + // Consider changing this isStringType to support + // utf-8 and ascii string literals if (immediate && ret.chplType != dtString) { ret.isLVPtr = GEN_VAL; if (immediate->const_kind == CONST_KIND_STRING) { @@ -3160,6 +3163,9 @@ VarSymbol *new_StringSymbol(const char *str) { gFalse); // owned = false ctor->insertAtTail(gFalse); // needToCopy = false + // Either: + // change dtString to be concrete, or + // make this new VarSymbol have a concrete string(ascii) type s = new VarSymbol(astr("_str_literal_", istr(literal_id++)), dtString); s->addFlag(FLAG_NO_AUTO_DESTROY); s->addFlag(FLAG_CONST); @@ -3386,6 +3392,8 @@ static Type* immediate_type(Immediate *imm) { switch (imm->const_kind) { case CONST_KIND_STRING: { + // We might need to have string_kind for UTF-8 and ASCII strings. + // (would need to change ifa/num.h and ifa/num.cpp) if (imm->string_kind == STRING_KIND_STRING) { return dtString; } else if (imm->string_kind == STRING_KIND_C_STRING) { diff --git a/compiler/AST/type.cpp b/compiler/AST/type.cpp index c050ea89ba25..97559f1e7b08 100644 --- a/compiler/AST/type.cpp +++ b/compiler/AST/type.cpp @@ -1970,6 +1970,9 @@ bool isArrayClass(Type* type) { bool isString(Type* type) { bool retval = false; + // Pretty sure this needs to return true if isStringType + // would return true + if (AggregateType* aggr = toAggregateType(type)) retval = strcmp(aggr->symbol->name, "string") == 0; diff --git a/compiler/include/type.h b/compiler/include/type.h index 5c63d2748029..c3088067f967 100644 --- a/compiler/include/type.h +++ b/compiler/include/type.h @@ -244,7 +244,12 @@ TYPE_EXTERN PrimitiveType* dtSyncVarAuxFields; TYPE_EXTERN PrimitiveType* dtSingleVarAuxFields; // Well-known types -TYPE_EXTERN AggregateType* dtString; +TYPE_EXTERN AggregateType* dtString; // consider renaming this dtStringGeneric + +// Add: +// dtStringAscii +// dtStringUTF8 + TYPE_EXTERN AggregateType* dtArray; TYPE_EXTERN AggregateType* dtBaseArr; TYPE_EXTERN AggregateType* dtBaseDom; @@ -296,12 +301,16 @@ bool isDistImplType(Type* t); bool isSyncType(Type* t); bool isAtomicType(Type* t); bool isRefIterType(Type* t); +// isStringType +// returns true for generic version, string(ascii), string(utf-8) +// ie all instantiations of the generic version bool isSubClass(Type* type, Type* baseType); bool isDistClass(Type* type); bool isDomainClass(Type* type); bool isArrayClass(Type* type); +// ? is same as isStringType? bool isString(Type* type); bool isUserDefinedRecord(Type* type); diff --git a/compiler/optimizations/bulkCopyRecords.cpp b/compiler/optimizations/bulkCopyRecords.cpp index 2cc63cdbc83f..72a6438bd802 100644 --- a/compiler/optimizations/bulkCopyRecords.cpp +++ b/compiler/optimizations/bulkCopyRecords.cpp @@ -66,6 +66,8 @@ static bool isTrivialAssignment(FnSymbol* fn) // Skip this optimization for string/wide string types // (due to problems providing additional arguments for // PRIM_ASSIGN). + + // This needs to call isStringType or check for either UTF-8/ascii string if (argType == dtString) return false; diff --git a/compiler/passes/buildDefaultFunctions.cpp b/compiler/passes/buildDefaultFunctions.cpp index a90bb2a07802..82e05b6c8ee4 100644 --- a/compiler/passes/buildDefaultFunctions.cpp +++ b/compiler/passes/buildDefaultFunctions.cpp @@ -754,6 +754,7 @@ static void build_enum_cast_function(EnumType* et) { fn->addFlag(FLAG_COMPILER_GENERATED); arg1 = new ArgSymbol(INTENT_BLANK, "t", dtAny); arg1->addFlag(FLAG_TYPE_VARIABLE); + // This argument should be a generic string type arg2 = new ArgSymbol(INTENT_BLANK, "_arg2", dtString); fn->insertFormalAtTail(arg1); fn->insertFormalAtTail(arg2); @@ -1300,6 +1301,7 @@ static void buildDefaultReadWriteFunctions(AggregateType* ct) { static void buildStringCastFunction(EnumType* et) { + // Think this one should be the generic string if (function_exists("_cast", 2, dtString, et)) return; @@ -1311,6 +1313,8 @@ static void buildStringCastFunction(EnumType* et) { ArgSymbol* arg = new ArgSymbol(INTENT_BLANK, "this", et); arg->addFlag(FLAG_ARG_THIS); fn->insertFormalAtTail(arg); + // This one needs to turn into : string + // with the generic string type fn->where = new BlockStmt(new CallExpr("==", t, dtString->symbol)); for_enums(constant, et) { diff --git a/compiler/passes/checkResolved.cpp b/compiler/passes/checkResolved.cpp index 518ae6d43280..943e06cb1f82 100644 --- a/compiler/passes/checkResolved.cpp +++ b/compiler/passes/checkResolved.cpp @@ -276,6 +276,7 @@ static void checkExternProcs() { continue; for_formals(formal, fn) { + // This should call isStringType if (formal->typeInfo() == dtString) { if (fn->instantiatedFrom == NULL) { USR_FATAL_CONT(fn, "extern procedures should not take arguments of " diff --git a/compiler/passes/filesToAST.cpp b/compiler/passes/filesToAST.cpp index 4ae62d5808df..0dc6d1b448d1 100644 --- a/compiler/passes/filesToAST.cpp +++ b/compiler/passes/filesToAST.cpp @@ -57,6 +57,8 @@ static WellKnownType sWellKnownTypes[] = { {"BaseDom", &dtBaseDom, true}, {"BaseDist", &dtDist, true}, {"chpl_main_argument", &dtMainArgument, false} + // Can dtStringAscii be added here to + // refer to a type asciiString = string(ascii) ? }; diff --git a/compiler/passes/parallel.cpp b/compiler/passes/parallel.cpp index 32370c4de58e..30838c8c12df 100644 --- a/compiler/passes/parallel.cpp +++ b/compiler/passes/parallel.cpp @@ -877,6 +877,7 @@ static void findHeapVarsAndRefs(Map*>& defMap, !isSyncType(def->sym->type) && // Dont try to broadcast string literals, they'll get fixed in // another manner + // Should call isStringType (def->sym->type != dtString)))) { // replicate global const of primitive type INT_ASSERT(defMap.get(def->sym) && defMap.get(def->sym)->n == 1); diff --git a/compiler/resolution/functionResolution.cpp b/compiler/resolution/functionResolution.cpp index 0fc39ac72445..162addbb1a1a 100644 --- a/compiler/resolution/functionResolution.cpp +++ b/compiler/resolution/functionResolution.cpp @@ -1027,6 +1027,7 @@ resolveFormals(FnSymbol* fn) { continue; // Don't pass dtString params in by reference + // Should call isStringType if(formal->type == dtString && formal->hasFlag(FLAG_INSTANTIATED_PARAM)) continue; @@ -1242,6 +1243,7 @@ canInstantiate(Type* actualType, Type* formalType) { (is_int_type(actualType) || is_uint_type(actualType) || is_imag_type(actualType) || is_real_type(actualType) || is_complex_type(actualType))) return true; + // use isStringType, including possibly a generic string if (formalType == dtString && actualType==dtStringC) return true; if (formalType == dtStringC && actualType==dtStringCopy) @@ -1325,6 +1327,8 @@ static bool canParamCoerce(Type* actualType, Symbol* actualSym, Type* formalType if (fits_in_uint(get_width(formalType), var->immediate)) return true; } + + // isStringType(actualType) if (formalType == dtStringC && actualType == dtString) if (actualSym && actualSym->isImmediate()) return true; @@ -1370,6 +1374,8 @@ canCoerce(Type* actualType, Symbol* actualSym, Type* formalType, FnSymbol* fn, b } if (actualType->symbol->hasFlag(FLAG_REF)) return canDispatch(actualType->getValType(), NULL, formalType, fn, promotes); + + // isStringType(formalType) if (formalType == dtString && actualType == dtStringCopy) return true; if (formalType == dtStringC && actualType == dtStringCopy) @@ -1599,7 +1605,9 @@ computeGenericSubs(SymbolMap &subs, // foo("bar"); // and pass "bar" as a c_string instead of a string if (fn->hasFlag(FLAG_EXTERN) && (formal->type == dtAny) && + // (type == dtString) -> isStringType(type) (!formal->hasFlag(FLAG_PARAM)) && (type == dtString) && + // isStringType(alignedA...) (alignedActuals.v[i]->type == dtString) && (alignedActuals.v[i]->isImmediate())) { subs.put(formal, dtStringC->symbol); @@ -2214,6 +2222,8 @@ static bool paramWorks(Symbol* actual, Type* formalType) { return fits_in_uint(get_width(formalType), imm); } if (imm->const_kind == CONST_KIND_STRING) { + // isStringType for now, but maybe put a comment + // that it it wouldn't work for UTF-16 e.g. if (formalType == dtStringC && actual->type == dtString) { return true; } @@ -5658,6 +5668,10 @@ preFold(Expr* expr) { is_bool_type(oldType)) && (is_int_type(newType) || is_uint_type(newType) || is_bool_type(newType) || is_enum_type(newType) || + // newType can be string(ascii) or string(utf8) + // couldn't be called with generic + // isStringType and maybe a comment about + // UTF-16 being disqualified newType == dtString || newType == dtStringC)) { VarSymbol* typevar = toVarSymbol(newType->defaultValue); EnumType* typeenum = toEnumType(newType); @@ -5669,9 +5683,12 @@ preFold(Expr* expr) { coerce_immediate(var->immediate, &coerce); result = new SymExpr(new_ImmediateSymbol(&coerce)); call->replace(result); + // isStringType } else if (newType == dtString) { // typevar will be null for dtString so we need a special // case. + + // newType -> STRING_KIND_STRING_UTF8/ASCII Immediate coerce = Immediate("", STRING_KIND_STRING); coerce_immediate(var->immediate, &coerce); result = new SymExpr(new_StringSymbol(coerce.v_string)); @@ -5699,6 +5716,7 @@ preFold(Expr* expr) { } else { INT_FATAL("unexpected case in cast_fold"); } + // isStringType but not UTF-16 } else if (oldType == dtString && newType == dtStringC) { result = new SymExpr(new_CStringSymbol(var->immediate->v_string)); call->replace(result); @@ -5708,6 +5726,7 @@ preFold(Expr* expr) { } else if (EnumSymbol* enumSym = toEnumSymbol(sym->var)) { if (SymExpr* sym = toSymExpr(call->get(1))) { Type* newType = sym->var->type; + // isStringType if (newType == dtString) { result = new SymExpr(new_StringSymbol(enumSym->name)); call->replace(result); @@ -6583,6 +6602,8 @@ postFold(Expr* expr) { if (lhs->var->isParameter() && rhs->var->isParameter()) { const char* lstr = get_string(lhs); const char* rstr = get_string(rhs); + // isStringType + // alternatively, could be !(dtStringC || dtStringCopy) if (lhs->var->type == dtString) result = new SymExpr(new_StringSymbol(astr(lstr, rstr))); else @@ -6935,6 +6956,8 @@ resolveExpr(Expr* expr) { // Don't try to resolve the defaultTypeConstructor for string literals // (resolution ordering issue, string literals are encountered too early // on and we don't know enough to be able to resolve them at that point) + + // isStringType, but won't be generic if (!(ct == dtString && (sym->var->isParameter() || sym->var->hasFlag(FLAG_INSTANTIATED_PARAM))) && !ct->symbol->hasFlag(FLAG_GENERIC) && @@ -7823,6 +7846,37 @@ resolve() { resolveExternVarSymbols(); + // At least by here + // resolve string(ascii) and string(utf8) types + // set dtStringAscii and dtStringUTF8 to them + + + // 2 approaches: + // 1) resolve string(ascii) and string(utf8) + // by creating calls to their type constructor + + /* + // ? is the type constructor really called string, or something else? + + CallExpr* asciiEnum = new CallExpr("chpl_get_string_ascii_enum"); + + + CallExpr* call = new CallExpr(dtString->defaultTypeConstructor + asciiEnum); + + // insert it in the AST somewhere... + // Try putting it into the String module's init function? + resolveCallAndCallee(call); + */ + + // 2) in String.chpl, make declarations like + // type asciiString = string(ascii); + // type utf8String = string(utf8); + + // at this point, you'd have say + // make new SymExpr( "string_ascii" ) + // dtStringAscii = resolveTypeAlias(se) + // --ipe does not build a mainModule if (mainModule) resolveUses(mainModule); diff --git a/compiler/resolution/generics.cpp b/compiler/resolution/generics.cpp index 728aee10dc33..7470846d62fd 100644 --- a/compiler/resolution/generics.cpp +++ b/compiler/resolution/generics.cpp @@ -466,6 +466,7 @@ renameInstantiatedType(TypeSymbol* sym, SymbolMap& subs, FnSymbol* fn) { VarSymbol* var = toVarSymbol(value); if (var && var->immediate) { Immediate* immediate = var->immediate; + // should use isStringType if (var->type == dtString || var->type == dtStringC) renameInstantiatedTypeString(sym, var); else if (immediate->const_kind == NUM_KIND_BOOL) { diff --git a/compiler/resolution/wrappers.cpp b/compiler/resolution/wrappers.cpp index 26d74268c105..08063fa3f608 100644 --- a/compiler/resolution/wrappers.cpp +++ b/compiler/resolution/wrappers.cpp @@ -668,6 +668,7 @@ void coerceActuals(FnSymbol* fn, CallInfo* info) { c2 = false; Type* actualType = actualSym->type; if (needToAddCoercion(actualType, actualSym, formalType, fn)) { + // actualType == dtString -> isStringType(actualType) if (formalType == dtStringC && actualType == dtString && actualSym->isImmediate()) { // We do this swap since we know the string is a valid literal // There also is no cast defined for string->c_string on purpose (you diff --git a/modules/internal/String.chpl b/modules/internal/String.chpl index 60a2bce80f05..d6089847e0cb 100644 --- a/modules/internal/String.chpl +++ b/modules/internal/String.chpl @@ -1249,6 +1249,12 @@ module String { } // end record string + // Part of one approach ? + // proc chpl_get_string_ascii_enum() { + // return Encoding.ascii; + // } + // type string_ascii = string(Encoding.ascii); + // type string_utf8 = string(Encoding.utf8); // We'd like this to be by ref, but doing so leads to an internal // compiler error. See