diff --git a/compiler/AST/AggregateType.cpp b/compiler/AST/AggregateType.cpp index 841465164a5e..be7ae0084479 100644 --- a/compiler/AST/AggregateType.cpp +++ b/compiler/AST/AggregateType.cpp @@ -563,10 +563,13 @@ int AggregateType::getFieldPosition(const char* name, bool fatal) { } -Symbol* AggregateType::getField(const char* name, bool fatal) { +Symbol* AggregateType::getField(const char* name, bool fatal) const { + // non-const-this: a workaround for const issues with Vec, baseAST + AggregateType* ncThis = const_cast(this); + Vec next, current; Vec* next_p = &next, *current_p = ¤t; - current_p->set_add(this); + current_p->set_add(ncThis); while (current_p->n != 0) { forv_Vec(Type, t, *current_p) { if (AggregateType* ct = toAggregateType(t)) { @@ -598,7 +601,7 @@ Symbol* AggregateType::getField(const char* name, bool fatal) { } -Symbol* AggregateType::getField(int i) { +Symbol* AggregateType::getField(int i) const { return toDefExpr(fields.get(i))->sym; } diff --git a/compiler/AST/ForLoop.cpp b/compiler/AST/ForLoop.cpp index ece32bbbb35b..a2712597b383 100644 --- a/compiler/AST/ForLoop.cpp +++ b/compiler/AST/ForLoop.cpp @@ -22,7 +22,6 @@ #include "astutil.h" #include "AstVisitor.h" #include "build.h" -#include "codegen.h" #include "DeferStmt.h" #include diff --git a/compiler/AST/type.cpp b/compiler/AST/type.cpp index b296f37047fd..2fb660a9fb12 100644 --- a/compiler/AST/type.cpp +++ b/compiler/AST/type.cpp @@ -92,7 +92,18 @@ bool Type::isDefaultIntentConst() const { return retval; } -Symbol* Type::getField(const char* name, bool fatal) { +bool Type::isWidePtrType() const { + if (symbol->hasEitherFlag(FLAG_WIDE_REF, FLAG_WIDE_CLASS)) { + // Workaround an ugly hack in insert wide references + // which can make a wide _array record containing an "addr" record + Type* baseType = this->getField("addr")->type; + if (isReferenceType(baseType) || isClass(baseType) || baseType == dtNil) + return true; + } + return false; +} + +Symbol* Type::getField(const char* name, bool fatal) const { INT_FATAL(this, "getField not called on AggregateType"); return NULL; } diff --git a/compiler/codegen/codegen.cpp b/compiler/codegen/codegen.cpp index 2a7418fce119..05f72b2694c9 100644 --- a/compiler/codegen/codegen.cpp +++ b/compiler/codegen/codegen.cpp @@ -2005,29 +2005,6 @@ void codegen(void) { // --llvm-wide-opt is picky about other settings. // Check them here. if (!llvmCodegen ) USR_FATAL("--llvm-wide-opt requires --llvm"); - if ( widePointersStruct ) { - // generating global pointers of size > 64 bits is not - // possible with LLVM 3.3; it might be possible in the future. - - // If we have -fLLVMWideOpt, we must use packed wide - // pointers (because optimizations assume pointer size - // is the same - at most 64 bits - for all address spaces. - // 'multiple address space' patch series, submitted to LLVM 3.2, - // was backed out mostly for lack of testing. Perhaps the situation - // will be resolved in LLVM 3.4). - USR_FATAL("--llvm-wide-opt requires packed wide pointers; " \ - "try export CHPL_WIDE_POINTERS=node16"); - } - } - - if( widePointersStruct ) { - // OK - } else { - // While the C code generator can emit packed pointers, - // it does so only to help make sure that packed pointer code - // generation is correct. It is not a "supported configuration". - if( ! llvmCodegen ) - USR_WARN("C code generation for packed pointers not supported"); } // Set the executable name if it isn't set already. @@ -2198,6 +2175,7 @@ void codegen(void) { // just codegen the modules. if( llvmCodegen ) { #ifdef HAVE_LLVM + checkAdjustedDataLayout(); forv_Vec(ModuleSymbol, currentModule, allModules) { mysystem(astr("# codegen-ing module", currentModule->name), "generating comment for --print-commands option"); @@ -2276,65 +2254,18 @@ void makeBinary(void) { } } -#ifdef HAVE_LLVM -GenInfo::GenInfo( - std::string clangCcIn, - std::string clangCxxIn, - std::string compilelineIn, - std::vector clangCCArgsIn, - std::vector clangLDArgsIn, - std::vector clangOtherArgsIn, - bool parseOnlyIn ) - : cfile(NULL), cLocalDecls(), cStatements(), - lineno(-1), filename(NULL), parseOnly(parseOnlyIn), - // the rest of these are only in GenInfo with HAVE_LLVM - module(NULL), builder(NULL), lvt(NULL), - clangCC(clangCcIn), - clangCXX(clangCxxIn), - compileline(compilelineIn), - clangCCArgs(clangCCArgsIn), clangLDArgs(clangLDArgsIn), - clangOtherArgs(clangOtherArgsIn), - codegenOptions(), diagOptions(NULL), - DiagClient(NULL), - DiagID(NULL), - Diags(NULL), - Clang(NULL), clangTargetOptions(), clangLangOptions(), - moduleName("root"), llvmContext(), Ctx(NULL), - targetData(NULL), cgBuilder(NULL), cgAction(NULL), - tbaaRootNode(NULL), - targetLayout(), globalToWideInfo(), - FPM_postgen(NULL) -{ - std::string home(CHPL_HOME); - std::string rtmain = home + "/runtime/etc/rtmain.c"; - - setupClang(this, rtmain); - - // Create a new LLVM module, IRBuilder, and LayeredValueTable. - if( ! parseOnly ) { - module = new llvm::Module(moduleName, llvmContext); - builder = new llvm::IRBuilder<>(module->getContext()); - } - - lvt = new LayeredValueTable(); - - // These are initialized only after we have types - // for everything and are deciding what calls to make. - // these are set by setupClangContext from CCodeGenAction. - Ctx = NULL; - targetData = NULL; - cgBuilder = NULL; -} -#endif -// No LLVM GenInfo::GenInfo() : cfile(NULL), cLocalDecls(), cStatements(), - lineno(-1), filename(NULL), parseOnly(false) + lineno(-1), filename(NULL) #ifdef HAVE_LLVM - // Could set more of these to NULL, but the real - // point is to just core-dump if we end up trying - // to use them.... - , module(NULL), builder(NULL), lvt(NULL) + , + lvt(NULL), module(NULL), builder(NULL), + loopStack(), + llvmContext(), + tbaaRootNode(NULL), + globalToWideInfo(), + FPM_postgen(NULL), + clangInfo(NULL) #endif { } diff --git a/compiler/codegen/expr.cpp b/compiler/codegen/expr.cpp index 60a3b3a9ba36..c9a5d53fbc62 100644 --- a/compiler/codegen/expr.cpp +++ b/compiler/codegen/expr.cpp @@ -223,7 +223,7 @@ static llvm::Value *convertValueToType(llvm::Value *value, llvm::Type *newType, bool isSigned = false, bool force = false) { GenInfo* info = gGenInfo; - return convertValueToType(info->builder, info->targetData, value, newType, isSigned, force); + return convertValueToType(info->builder, info->module->getDataLayout(), value, newType, isSigned, force); } static @@ -235,9 +235,10 @@ PromotedPair convertValuesToLarger(llvm::Value *value1, llvm::Value *value2, boo } #endif -#define WIDE_GEP_LOC 0 -#define WIDE_GEP_ADDR 1 -#define WIDE_GEP_SIZE 2 +enum WideThingField { + WIDE_GEP_LOC=0, + WIDE_GEP_ADDR=1, +}; static const char* wide_fields[] = {"locale", "addr", "size", NULL}; @@ -287,7 +288,7 @@ GenRet codegenWideAddr(GenRet locale, GenRet raddr, Type* wideType = NULL) INT_ASSERT(wideRefType); locale = codegenValue(locale); - if( widePointersStruct ) { + if( !fLLVMWideOpt ) { // Create a stack-local stored wide pointer // of the appropriate type. ret = createTempVar(wideRefType); @@ -328,34 +329,27 @@ GenRet codegenWideAddr(GenRet locale, GenRet raddr, Type* wideType = NULL) // Load whatever we stored... ret = codegenValue(ret); } else { - if( fLLVMWideOpt ) { #ifdef HAVE_LLVM - GenRet wideTy = wideRefType; // get the LLVM type for the wide ref. - llvm::PointerType *addrType = llvm::cast(wideTy.type); + GenRet wideTy = wideRefType; // get the LLVM type for the wide ref. + llvm::PointerType *addrType = llvm::cast(wideTy.type); - // call GLOBAL_FN_GLOBAL_MAKE dummy function - llvm::Function* fn = getMakeFn(info->module, &info->globalToWideInfo, - addrType); - INT_ASSERT(fn); - llvm::Type* eltType = addrType->getElementType(); - llvm::Type* locAddrType = llvm::PointerType::getUnqual(eltType); - // Null pointers require us to possibly cast to the pointer type - // we are supposed to have since null has type void*. - llvm::Value* locAddr = raddr.val; - locAddr = info->builder->CreatePointerCast(locAddr, locAddrType); + // call GLOBAL_FN_GLOBAL_MAKE dummy function + llvm::Function* fn = getMakeFn(info->module, &info->globalToWideInfo, + addrType); + INT_ASSERT(fn); + llvm::Type* eltType = addrType->getElementType(); + llvm::Type* locAddrType = llvm::PointerType::getUnqual(eltType); + // Null pointers require us to possibly cast to the pointer type + // we are supposed to have since null has type void*. + llvm::Value* locAddr = raddr.val; + locAddr = info->builder->CreatePointerCast(locAddr, locAddrType); #if HAVE_LLVM_VER >= 37 - ret.val = info->builder->CreateCall(fn, {locale.val, locAddr}); + ret.val = info->builder->CreateCall(fn, {locale.val, locAddr}); #else - ret.val = info->builder->CreateCall2(fn, locale.val, locAddr); + ret.val = info->builder->CreateCall2(fn, locale.val, locAddr); #endif #endif - } else { - // Packed wide pointers. - ret = codegenCallExpr("chpl_return_wide_ptr_loc", - locale, codegenCastToVoidStar(raddr)); - ret = codegenCast(wideRefType, ret); - } } ret.chplType = wideRefType->getValType(); @@ -619,7 +613,7 @@ GenRet codegenWideHere(GenRet addr, Type* wideType = NULL) static bool isWide(GenRet x) { if( x.isLVPtr == GEN_WIDE_PTR ) return true; - if( x.chplType && x.chplType->symbol->hasEitherFlag(FLAG_WIDE_REF,FLAG_WIDE_CLASS) ) return true; + if( x.chplType && x.chplType->isWidePtrType() ) return true; return false; } @@ -657,6 +651,7 @@ Type* getRefTypesForWideThing(GenRet wide, Type** wideRefTypeOut) // This function casts a wide pointer to a void* wide pointer (ie wide_ptr_t) // for use with packed wide pointers. +/* static GenRet codegenCastWideToVoid(GenRet wide) { INT_ASSERT(wide.isLVPtr == GEN_WIDE_PTR || @@ -679,6 +674,7 @@ static GenRet codegenCastWideToVoid(GenRet wide) { return codegenCast("wide_ptr_t", wide); } +*/ // Extract a field of a wide string/ptr, returning an lvalue-pointer to the that // field if we have a pointer to the wide string/ptr. We need this function @@ -690,19 +686,16 @@ static GenRet codegenCastWideToVoid(GenRet wide) { // // Works for wide strings or wide pointers. // -// field is WIDE_GEP_LOC, WIDE_GEP_ADDR, or WIDE_GEP_SIZE. -static GenRet codegenWideThingField(GenRet ws, int field) +// field is WIDE_GEP_LOC, WIDE_GEP_ADDR +static GenRet codegenWideThingField(GenRet ws, WideThingField field) { GenRet ret; GenInfo* info = gGenInfo; INT_ASSERT(field == WIDE_GEP_LOC || - field == WIDE_GEP_ADDR || - field == WIDE_GEP_SIZE ); + field == WIDE_GEP_ADDR); - if( field == WIDE_GEP_SIZE ) { - ret.chplType = SIZE_TYPE; - } else if( field == WIDE_GEP_LOC ) { + if( field == WIDE_GEP_LOC ) { ret.chplType = LOCALE_ID_TYPE; } else if( field == WIDE_GEP_ADDR ) { // get the local reference type @@ -726,18 +719,36 @@ static GenRet codegenWideThingField(GenRet ws, int field) } } else { #ifdef HAVE_LLVM - if (ws.val->getType()->isPointerTy()){ - ret.isLVPtr = GEN_PTR; - ret.val = info->builder->CreateConstInBoundsGEP2_32( + if ( !fLLVMWideOpt ) { + if (ws.val->getType()->isPointerTy()){ + ret.isLVPtr = GEN_PTR; + ret.val = info->builder->CreateConstInBoundsGEP2_32( #if HAVE_LLVM_VER >= 37 - NULL, + NULL, #endif - ws.val, 0, field); + ws.val, 0, field); + } else { + ret.isLVPtr = GEN_VAL; + ret.val = info->builder->CreateExtractValue(ws.val, field); + } + assert(ret.val); } else { - ret.isLVPtr = GEN_VAL; - ret.val = info->builder->CreateExtractValue(ws.val, field); + + // Workaround: for LLVMWideOpt, get pointers to parts + // of addresses, but only support that when they are rvalues. + + // TODO: replace this code with an assert. + + // It would probably be better to fix InsertWideReferences. + // The problematic pattern comes up when the optimization + // local _array -> local _array._instance fires in the + // array's deinit/_do_destroy code. + if( field == WIDE_GEP_LOC ) { + ret = createTempVarWith(codegenRlocale(ws)); + } else if( field == WIDE_GEP_ADDR ) { + ret = createTempVarWith(codegenRaddr(ws)); + } } - assert(ret.val); #endif } @@ -755,27 +766,21 @@ GenRet codegenRaddr(GenRet wide) type = getRefTypesForWideThing(wide, &wideRefType); - if( widePointersStruct ) { + if( !fLLVMWideOpt ) { ret = codegenValue(codegenWideThingField(wide, WIDE_GEP_ADDR)); } else { - if( fLLVMWideOpt ) { #ifdef HAVE_LLVM - GenInfo* info = gGenInfo; - if (wide.isLVPtr == GEN_PTR) wide = codegenValue(wide); - GenRet wideTy = wideRefType; // get the LLVM type for the wide ref. - llvm::PointerType *addrType = llvm::cast(wideTy.type); - - // call GLOBAL_FN_GLOBAL_ADDR dummy function - llvm::Function* fn = getAddrFn(info->module, &info->globalToWideInfo, - addrType); - INT_ASSERT(fn); - ret.val = info->builder->CreateCall(fn, wide.val); + GenInfo* info = gGenInfo; + if (wide.isLVPtr == GEN_PTR) wide = codegenValue(wide); + GenRet wideTy = wideRefType; // get the LLVM type for the wide ref. + llvm::PointerType *addrType = llvm::cast(wideTy.type); + + // call GLOBAL_FN_GLOBAL_ADDR dummy function + llvm::Function* fn = getAddrFn(info->module, &info->globalToWideInfo, + addrType); + INT_ASSERT(fn); + ret.val = info->builder->CreateCall(fn, wide.val); #endif - } else { - // Packed wide pointers - ret = codegenCallExpr("chpl_wide_ptr_get_address", - codegenCastWideToVoid(wide)); - } ret = codegenCast(type, ret); } ret.chplType = type; @@ -788,38 +793,22 @@ static GenRet codegenRlocale(GenRet wide) GenRet ret; Type* type = LOCALE_ID_TYPE; - if( widePointersStruct ) { + if( !fLLVMWideOpt ) { ret = codegenWideThingField(wide, WIDE_GEP_LOC); } else { - if( fLLVMWideOpt ) { -#ifdef HAVE_LLVM - Type* wideRefType = NULL; - GenInfo* info = gGenInfo; - getRefTypesForWideThing(wide, &wideRefType); - if (wide.isLVPtr == GEN_PTR) wide = codegenValue(wide); - GenRet wideTy = wideRefType; // get the LLVM type for the wide ref. - llvm::PointerType *addrType = llvm::cast(wideTy.type); - - // call GLOBAL_FN_GLOBAL_LOCID dummy function - llvm::Function* fn = getLocFn(info->module, &info->globalToWideInfo, addrType); - INT_ASSERT(fn); - ret.val = info->builder->CreateCall(fn, wide.val); -#endif - } else { - - // Packed wide pointers - ret = codegenCallExpr("chpl_wide_ptr_get_localeID", - codegenCastWideToVoid(wide)); #ifdef HAVE_LLVM - GenInfo* info = gGenInfo; - if (!info->cfile) { - assert(ret.val); - GenRet expectType = LOCALE_ID_TYPE; - ret.val = convertValueToType(ret.val, expectType.type); - assert(ret.val); - } + Type* wideRefType = NULL; + GenInfo* info = gGenInfo; + getRefTypesForWideThing(wide, &wideRefType); + if (wide.isLVPtr == GEN_PTR) wide = codegenValue(wide); + GenRet wideTy = wideRefType; // get the LLVM type for the wide ref. + llvm::PointerType *addrType = llvm::cast(wideTy.type); + + // call GLOBAL_FN_GLOBAL_LOCID dummy function + llvm::Function* fn = getLocFn(info->module, &info->globalToWideInfo, addrType); + INT_ASSERT(fn); + ret.val = info->builder->CreateCall(fn, wide.val); #endif - } } ret.chplType = type; return ret; @@ -829,31 +818,25 @@ static GenRet codegenRnode(GenRet wide){ GenRet ret; Type* type = NODE_ID_TYPE; - if( widePointersStruct ) { + if( !fLLVMWideOpt ) { ret = codegenCallExpr("chpl_nodeFromLocaleID", codegenAddrOf(codegenValuePtr( codegenWideThingField(wide, WIDE_GEP_LOC))), /*ln*/codegenZero(), /*fn*/codegenZero32()); } else { - if( fLLVMWideOpt ) { #ifdef HAVE_LLVM - Type* wideRefType = NULL; - GenInfo* info = gGenInfo; - getRefTypesForWideThing(wide, &wideRefType); - if (wide.isLVPtr == GEN_PTR) wide = codegenValue(wide); - GenRet wideTy = wideRefType; // get the LLVM type for the wide ref. - llvm::PointerType *addrType = llvm::cast(wideTy.type); - - // call GLOBAL_FN_GLOBAL_NODEID dummy function - llvm::Function* fn = getNodeFn(info->module, &info->globalToWideInfo, addrType); - INT_ASSERT(fn); - ret.val = info->builder->CreateCall(fn, wide.val); + Type* wideRefType = NULL; + GenInfo* info = gGenInfo; + getRefTypesForWideThing(wide, &wideRefType); + if (wide.isLVPtr == GEN_PTR) wide = codegenValue(wide); + GenRet wideTy = wideRefType; // get the LLVM type for the wide ref. + llvm::PointerType *addrType = llvm::cast(wideTy.type); + + // call GLOBAL_FN_GLOBAL_NODEID dummy function + llvm::Function* fn = getNodeFn(info->module, &info->globalToWideInfo, addrType); + INT_ASSERT(fn); + ret.val = info->builder->CreateCall(fn, wide.val); #endif - } else { - // Packed wide pointers - ret = codegenCallExpr("chpl_wide_ptr_get_node", - codegenCastWideToVoid(wide)); - } } ret.chplType = type; @@ -2167,7 +2150,7 @@ void convertArgumentForCall(llvm::FunctionType *fnType, int8_type = llvm::Type::getInt8Ty(info->llvmContext); int8_ptr_type = int8_type->getPointerTo(); - arg_size = getTypeSizeInBytes(info->targetData, t); + arg_size = getTypeSizeInBytes(info->module->getDataLayout(), t); assert(arg_size >= 0); // Allocate space on the stack... @@ -2183,7 +2166,7 @@ void convertArgumentForCall(llvm::FunctionType *fnType, } targetType = fnType->getParamType(outArgs.size()); dst_ptr_type = targetType->getPointerTo(); - cur_size = getTypeSizeInBytes(info->targetData, targetType); + cur_size = getTypeSizeInBytes(info->module->getDataLayout(), targetType); assert(cur_size > 0); @@ -2201,7 +2184,7 @@ void convertArgumentForCall(llvm::FunctionType *fnType, outArgs.push_back(cur); //printf("offset was %i\n", (int) offset); - offset = getTypeFieldNext(info->targetData, t, offset + cur_size - 1); + offset = getTypeFieldNext(info->module->getDataLayout(), t, offset + cur_size - 1); //printf("offset now %i\n", (int) offset); } } else { @@ -2844,7 +2827,7 @@ void codegenCopy(GenRet dest, GenRet src, Type* chplType=NULL) if( chplType && chplType->symbol->hasFlag(FLAG_STAR_TUPLE) ) { // Always use memcpy for star tuples. useMemcpy = true; - } else if( isTypeSizeSmallerThan(info->targetData, eltTy, + } else if( isTypeSizeSmallerThan(info->module->getDataLayout(), eltTy, 256 /* max bytes to load/store */)) { // OK } else { @@ -4458,6 +4441,11 @@ GenRet CallExpr::codegenPrimitive() { // (instead of just using the node number) GenRet lc = codegenLocaleForNode(locale); + remoteAddr = codegenWideAddr(lc, remoteAddr); + + if (remoteAddr.isLVPtr == GEN_WIDE_PTR) + remoteAddr = codegenAddrOf(remoteAddr); + if (localAddr.isLVPtr == GEN_PTR) localAddr = codegenAddrOf(localAddr); @@ -4466,11 +4454,11 @@ GenRet CallExpr::codegenPrimitive() { if (isget) { codegenCallMemcpy(localAddr, - codegenAddrOf(codegenWideAddr(lc, remoteAddr)), + remoteAddr, size, NULL); } else { - codegenCallMemcpy(codegenAddrOf(codegenWideAddr(lc, remoteAddr)), + codegenCallMemcpy(remoteAddr, localAddr, size, NULL); diff --git a/compiler/codegen/symbol.cpp b/compiler/codegen/symbol.cpp index da85556ca072..7d6e04d44aef 100644 --- a/compiler/codegen/symbol.cpp +++ b/compiler/codegen/symbol.cpp @@ -68,6 +68,7 @@ const char* llvmStageName[llvmStageNum::LAST] = { "none", //llvmStageNum::NONE "basic", //llvmStageNum::BASIC "full", //llvmStageNum::FULL + "every", //llvmStageNum::EVERY "early-as-possible", "module-optimizer-early", "loop-optimizer-end", @@ -580,15 +581,11 @@ void VarSymbol::codegenDefC(bool global, bool isHeader) { } else if (ct->symbol->hasFlag(FLAG_WIDE_REF) || ct->symbol->hasFlag(FLAG_WIDE_CLASS)) { if (isFnSymbol(defPoint->parentSymbol)) { - if (widePointersStruct) { - // - // CHPL_LOCALEID_T_INIT is #defined in the chpl-locale-model.h - // file in the runtime, for the selected locale model. - // - str += " = {CHPL_LOCALEID_T_INIT, NULL}"; - } else { - str += " = ((wide_ptr_t) NULL)"; - } + // + // CHPL_LOCALEID_T_INIT is #defined in the chpl-locale-model.h + // file in the runtime, for the selected locale model. + // + str += " = {CHPL_LOCALEID_T_INIT, NULL}"; } } } @@ -1287,6 +1284,8 @@ void FnSymbol::codegenDef() { func->addFnAttr(llvm::Attribute::NoInline); llvmPrintIrCName = cname; } + if (fNoInline) + func->addFnAttr(llvm::Attribute::NoInline); llvm::BasicBlock *block = llvm::BasicBlock::Create(info->module->getContext(), "entry", func); @@ -1368,7 +1367,8 @@ void FnSymbol::codegenDef() { } } - if(llvmPrintIrStageNum == llvmStageNum::NONE + if((llvmPrintIrStageNum == llvmStageNum::NONE || + llvmPrintIrStageNum == llvmStageNum::EVERY) && strcmp(llvmPrintIrName, name) == 0) printLlvmIr(func, llvmStageNum::NONE); @@ -1380,7 +1380,8 @@ void FnSymbol::codegenDef() { // (note, in particular, the default pass manager's // populateFunctionPassManager does not include vectorization) info->FPM_postgen->run(*func); - if(llvmPrintIrStageNum == llvmStageNum::BASIC + if((llvmPrintIrStageNum == llvmStageNum::BASIC || + llvmPrintIrStageNum == llvmStageNum::EVERY) && strcmp(llvmPrintIrName, name) == 0) printLlvmIr(func, llvmStageNum::BASIC); #endif diff --git a/compiler/codegen/type.cpp b/compiler/codegen/type.cpp index 91888a5d393f..18a522eb4465 100644 --- a/compiler/codegen/type.cpp +++ b/compiler/codegen/type.cpp @@ -259,60 +259,46 @@ void AggregateType::codegenDef() { } } else { if( outfile ) { - if( symbol->hasEitherFlag(FLAG_WIDE_REF, FLAG_WIDE_CLASS) && - (! widePointersStruct ) ) { - // Reach this branch when generating a wide/wide class as a - // global pointer! - Type* baseType = this->getField("addr")->type; - GenRet c = baseType; - - // could use __attribute__(address_space(101)) - // if we wanted to emit packed pointers in a different AS with clang. - fprintf(outfile, "typedef %s * %s;\n", - baseType->symbol->codegen().c.c_str(), - this->classStructName(true)); - } else { - fprintf(outfile, "typedef struct %s", this->classStructName(false)); - if (aggregateTag == AGGREGATE_CLASS && dispatchParents.n > 0) { - /* Add a comment to class definitions listing super classes */ - bool first = true; - fprintf(outfile, " /* : "); - forv_Vec(Type, parent, dispatchParents) { - if (parent) { - if (!first) { - fprintf(outfile, ", "); - } - fprintf(outfile, "%s", parent->symbol->codegen().c.c_str()); - first = false; + fprintf(outfile, "typedef struct %s", this->classStructName(false)); + if (aggregateTag == AGGREGATE_CLASS && dispatchParents.n > 0) { + /* Add a comment to class definitions listing super classes */ + bool first = true; + fprintf(outfile, " /* : "); + forv_Vec(Type, parent, dispatchParents) { + if (parent) { + if (!first) { + fprintf(outfile, ", "); } + fprintf(outfile, "%s", parent->symbol->codegen().c.c_str()); + first = false; } - fprintf(outfile, " */"); - } - fprintf(outfile, " {\n"); - if (symbol->hasFlag(FLAG_OBJECT_CLASS) && aggregateTag == AGGREGATE_CLASS) { - fprintf(outfile, "chpl__class_id chpl__cid;\n"); - } else if (aggregateTag == AGGREGATE_UNION) { - fprintf(outfile, "int64_t _uid;\n"); - if (this->fields.length != 0) - fprintf(outfile, "union {\n"); - } else if (this->fields.length == 0) { - // TODO: remove and enforce at least 1 element in a union - fprintf(outfile, "uint8_t dummyFieldToAvoidWarning;\n"); } + fprintf(outfile, " */"); + } + fprintf(outfile, " {\n"); + if (symbol->hasFlag(FLAG_OBJECT_CLASS) && aggregateTag == AGGREGATE_CLASS) { + fprintf(outfile, "chpl__class_id chpl__cid;\n"); + } else if (aggregateTag == AGGREGATE_UNION) { + fprintf(outfile, "int64_t _uid;\n"); + if (this->fields.length != 0) + fprintf(outfile, "union {\n"); + } else if (this->fields.length == 0) { + // TODO: remove and enforce at least 1 element in a union + fprintf(outfile, "uint8_t dummyFieldToAvoidWarning;\n"); + } - if (this->fields.length != 0) { - for_fields(field, this) { - field->codegenDef(); - } + if (this->fields.length != 0) { + for_fields(field, this) { + field->codegenDef(); } - flushStatements(); + } + flushStatements(); - if (aggregateTag == AGGREGATE_UNION) { - if (this->fields.length != 0) - fprintf(outfile, "} _u;\n"); - } - fprintf(outfile, "} %s;\n\n", this->classStructName(true)); + if (aggregateTag == AGGREGATE_UNION) { + if (this->fields.length != 0) + fprintf(outfile, "} _u;\n"); } + fprintf(outfile, "} %s;\n\n", this->classStructName(true)); } else { #ifdef HAVE_LLVM int paramID = 0; @@ -336,7 +322,7 @@ void AggregateType::codegenDef() { for_fields(field, this) { llvm::Type* fieldType = field->type->symbol->codegen().type; INT_ASSERT(fieldType); - uint64_t fieldSize = info->targetData->getTypeStoreSize(fieldType); + uint64_t fieldSize = info->module->getDataLayout().getTypeStoreSize(fieldType); if(fieldSize > largestSize) { largestType = fieldType; @@ -383,14 +369,12 @@ void AggregateType::codegenDef() { // Is it a class or a record? // if it's a record, we make the new type now. // if it's a class, we update the existing type. - if( symbol->hasEitherFlag(FLAG_WIDE_REF, FLAG_WIDE_CLASS) && - (! widePointersStruct ) ) { + if( this->isWidePtrType() && fLLVMWideOpt ) { // Reach this branch when generating a wide/wide class as a // global pointer! unsigned globalAddressSpace = 0; - if( fLLVMWideOpt ) - globalAddressSpace = info->globalToWideInfo.globalSpace; + globalAddressSpace = info->globalToWideInfo.globalSpace; Type* baseType = this->getField("addr")->type; llvm::Type* llBaseType = baseType->symbol->codegen().type; diff --git a/compiler/include/AggregateType.h b/compiler/include/AggregateType.h index eb1de56946b9..f7c696b8bcdb 100644 --- a/compiler/include/AggregateType.h +++ b/compiler/include/AggregateType.h @@ -94,8 +94,8 @@ class AggregateType : public Type { int getFieldPosition(const char* name, bool fatal = true); - Symbol* getField(const char* name, bool fatal = true); - Symbol* getField(int i); + Symbol* getField(const char* name, bool fatal = true) const; + Symbol* getField(int i) const; // e is as used in PRIM_GET_MEMBER/PRIM_GET_SVEC_MEMBER QualifiedType getFieldType(Expr* e); diff --git a/compiler/include/clangUtil.h b/compiler/include/clangUtil.h index 3bfa7e9fff9d..cebe345e79f9 100644 --- a/compiler/include/clangUtil.h +++ b/compiler/include/clangUtil.h @@ -141,6 +141,8 @@ bool lookupInExternBlock(ModuleSymbol* module, const char* name, bool alreadyConvertedExtern(ModuleSymbol* module, const char* name); bool setAlreadyConvertedExtern(ModuleSymbol* module, const char* name); +void checkAdjustedDataLayout(); + extern fileinfo gAllExternCode; extern fileinfo gChplCompilationConfig; diff --git a/compiler/include/codegen.h b/compiler/include/codegen.h index 60764f352d15..591ca3f58e4d 100644 --- a/compiler/include/codegen.h +++ b/compiler/include/codegen.h @@ -26,6 +26,7 @@ #include #ifdef HAVE_LLVM +// TODO -- forward declare more LLVM things #include "clangUtil.h" #include "llvmGlobalToWide.h" #endif @@ -36,12 +37,7 @@ #ifdef HAVE_LLVM // forward declare. -namespace clang { - namespace CodeGen { - class CodeGenModule; - } -} -class CCodeGenAction; +class ClangInfo; #endif @@ -84,54 +80,20 @@ struct GenInfo { int lineno; const char* filename; - bool parseOnly; #ifdef HAVE_LLVM - // If we're generating LLVM, the following are available + // stores parsed C stuff for extern blocks + LayeredValueTable *lvt; + + // Once we get to code generation.... llvm::Module *module; llvm::IRBuilder<> *builder; - LayeredValueTable *lvt; + llvm::TargetMachine* targetMachine; std::stack loopStack; - // Clang Stuff - std::string clangCC; - std::string clangCXX; - std::string compileline; - std::vector clangCCArgs; - std::vector clangLDArgs; - std::vector clangOtherArgs; - - clang::CodeGenOptions codegenOptions; - llvm::IntrusiveRefCntPtr diagOptions; - clang::TextDiagnosticPrinter* DiagClient; - llvm::IntrusiveRefCntPtr DiagID; - llvm::IntrusiveRefCntPtr Diags; - - clang::CompilerInstance *Clang; - // We get these out of the compiler instance - // before delete'ing it. - clang::TargetOptions clangTargetOptions; - clang::LangOptions clangLangOptions; - - // Once we get to code generation.... - std::string moduleName; llvm::LLVMContext llvmContext; - clang::ASTContext *Ctx; - - // After 3.3 this is llvm::DataLayout - LLVM_TARGET_DATA *targetData; - clang::CodeGen::CodeGenModule *cgBuilder; - CCodeGenAction *cgAction; - llvm::MDNode* tbaaRootNode; - // We stash the layout that Clang would like to use here. - // With fLLVMWideOpt, this will be the layout that we - // pass to the code generator even though we modify the - // version in the module (to add global pointer types) - // before running optimization. - std::string targetLayout; - // Information used to generate code with fLLVMWideOpt. Instead of // generating wide pointers with puts and gets, we generate // address space 100 (e.g.) pointers and use loads, stores, or memcpy, @@ -141,23 +103,12 @@ struct GenInfo { GlobalToWideInfo globalToWideInfo; // Optimizations to apply immediately after code-generating a fn - LEGACY_FUNCTION_PASS_MANAGER* FPM_postgen; - - // When using a function, just use cgModule->GetAddrOfFunction, - // which will cause cgModule to emit it on Builder->Release. - // - // - // defined in passes/codegen.cpp - GenInfo(std::string clangCC, - std::string clangCXX, - std::string compilelineIn, - std::vector clangCCArgs, - std::vector clangLDArgs, - std::vector clangOtherArgs, - bool parseOnly); + llvm::legacy::FunctionPassManager* FPM_postgen; + + ClangInfo* clangInfo; #endif - GenInfo(); + GenInfo(); }; diff --git a/compiler/include/driver.h b/compiler/include/driver.h index f4bed5b58305..ff02046de794 100644 --- a/compiler/include/driver.h +++ b/compiler/include/driver.h @@ -110,12 +110,6 @@ extern const char* CHPL_UNWIND; extern bool printPasses; extern FILE* printPassesFile; -// Set true if CHPL_WIDE_POINTERS==struct. -// In that case, the code generator emits structures -// for wide pointers. Otherwise, wide pointers are -// packed into a wide pointer type. -extern bool widePointersStruct; - extern char fExplainCall[256]; extern int explainCallID; extern int breakOnResolveID; diff --git a/compiler/include/llvmGlobalToWide.h b/compiler/include/llvmGlobalToWide.h index 25c47c770a8a..0cbb8c601fde 100644 --- a/compiler/include/llvmGlobalToWide.h +++ b/compiler/include/llvmGlobalToWide.h @@ -109,7 +109,6 @@ struct GlobalPointerInfo { globalToWideFn(NULL), wideToGlobalFn(NULL) { } }; -#define GLOBAL_PTR_BITS 64 #define GLOBAL_TYPE ".gt." #define GLOBAL_FN ".gf." #define GLOBAL_FN_GLOBAL_ADDR ".gf.addr." @@ -120,40 +119,37 @@ struct GlobalPointerInfo { #define GLOBAL_FN_WIDE_TO_GLOBAL ".gf.w2g." typedef llvm::DenseMap globalTypes_t; -typedef llvm::SmallPtrSet specialFunctions_t; +typedef std::vector specialFunctions_t; typedef llvm::TrackingVH runtime_fn_t; struct GlobalToWideInfo { unsigned globalSpace; unsigned wideSpace; + + unsigned globalPtrBits; + // this optimization currently assumes wide pointers are + // stored in a 128-bit struct representation that contains + // locale-id + // node + // addr + llvm::Type* localeIdType; llvm::Type* nodeIdType; + globalTypes_t gTypes; specialFunctions_t specialFunctions; - // args: packed wide ptr. Returns the address portion. - runtime_fn_t addrFn; - // args: packed wide ptr, &locale. sets locale = wide.locale. - // It has the complicated signature in order to keep - // arguments passed by pointer (vs structure) to avoid - // some issues with passing/returning structures. - runtime_fn_t locFn; - // args: packed wide ptr. Returns the node number portion. - runtime_fn_t nodeFn; - // args: const locale*, address. Returns a packed wide pointer (void*). - runtime_fn_t makeFn; - - // args: dst local address, wide address {locale,i8*}, num bytes, atomicness + // args: dst local address, src nodeid, src address, num bytes, atomicness runtime_fn_t getFn; - // args: dst wide address {locale,i8*}, local address, num bytes, atomicness + // args: dst nodeid, dst address, local address, num bytes, atomicness runtime_fn_t putFn; - // args: dst wide address {locale,i8*}; - // src wide address {locale,i8*}, + // args: dst nodeid, dst address + // src nodeid, src address // num bytes runtime_fn_t getPutFn; - // args: dst wide address {locale,i8*}, c (byte), num bytes + // args: dst nodeid, dst addr, c (byte), num bytes runtime_fn_t memsetFn; // Dummy function storing the runtime dependencies @@ -163,10 +159,12 @@ struct GlobalToWideInfo { runtime_fn_t preservingFn; GlobalToWideInfo() - : globalSpace(0), wideSpace(0), localeIdType(NULL), nodeIdType(NULL), gTypes(), specialFunctions() { } + : globalSpace(0), wideSpace(0), globalPtrBits(0), + localeIdType(NULL), nodeIdType(NULL), gTypes(), specialFunctions() { } }; -llvm::ModulePass *createGlobalToWide(GlobalToWideInfo* info, std::string setLayout); +llvm::ModulePass *createGlobalToWide(GlobalToWideInfo* info, + std::string setLayout); llvm::Type* convertTypeGlobalToWide(llvm::Module *module, GlobalToWideInfo* info, llvm::Type* t); diff --git a/compiler/include/llvmUtil.h b/compiler/include/llvmUtil.h index 6da92e23a003..0069a35b7c2d 100644 --- a/compiler/include/llvmUtil.h +++ b/compiler/include/llvmUtil.h @@ -40,7 +40,6 @@ #include "llvm/IR/Attributes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/DataLayout.h" -#define LLVM_TARGET_DATA llvm::DataLayout #define LLVM_ATTRIBUTE llvm::Attribute static inline bool llvm_fn_param_has_attr(llvm::Function* f, unsigned idx, llvm::Attribute::AttrKind v) { @@ -56,7 +55,6 @@ static inline bool llvm_fn_param_has_attr(llvm::Function* f, unsigned idx, llvm: #include "llvm/Intrinsics.h" #include "llvm/IntrinsicInst.h" #include "llvm/Attributes.h" -#define LLVM_TARGET_DATA llvm::DataLayout #define LLVM_ATTRIBUTE llvm::Attributes static inline bool llvm_fn_param_has_attr(llvm::Function* f, unsigned idx, llvm::Attributes::AttrVal v) { @@ -64,21 +62,9 @@ static inline bool llvm_fn_param_has_attr(llvm::Function* f, unsigned idx, llvm: return f->getParamAttributes(idx).hasAttribute(v); } -#elif HAVE_LLVM_VER >= 31 +#else -#include "llvm/Module.h" -#include "llvm/Value.h" -#include "llvm/Support/IRBuilder.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Intrinsics.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Attributes.h" -#define LLVM_TARGET_DATA llvm::TargetData -#define LLVM_ATTRIBUTE llvm::Attribute -static inline bool llvm_fn_param_has_attr(llvm::Function* f, unsigned idx, llvm::AttrConst v) -{ - return f->paramHasAttr(idx, v); -} +#error LLVM version is too old for this version of Chapel #endif @@ -108,12 +94,12 @@ llvm::Constant* codegenSizeofLLVM(llvm::Type* type); llvm::AllocaInst* makeAlloca(llvm::Type* type, const char* name, llvm::Instruction* insertBefore, unsigned n=1, unsigned align=0); llvm::Value* createTempVarLLVM(llvm::IRBuilder<>* builder, llvm::Type* type, const char* name); -llvm::Value *convertValueToType(llvm::IRBuilder<> *builder, LLVM_TARGET_DATA * targetData, llvm::Value *value, llvm::Type *newType, bool isSigned = false, bool force = false); +llvm::Value *convertValueToType(llvm::IRBuilder<> *builder, const llvm::DataLayout& targetData, llvm::Value *value, llvm::Type *newType, bool isSigned = false, bool force = false); PromotedPair convertValuesToLarger(llvm::IRBuilder<> *builder, llvm::Value *value1, llvm::Value *value2, bool isSigned1 = false, bool isSigned2 = false); -int64_t getTypeSizeInBytes(LLVM_TARGET_DATA * layout, llvm::Type* ty); -bool isTypeSizeSmallerThan(LLVM_TARGET_DATA * layout, llvm::Type* ty, uint64_t max_size_bytes); -uint64_t getTypeFieldNext(LLVM_TARGET_DATA * layout, llvm::Type* ty, uint64_t offset); +int64_t getTypeSizeInBytes(const llvm::DataLayout& layout, llvm::Type* ty); +bool isTypeSizeSmallerThan(const llvm::DataLayout& layout, llvm::Type* ty, uint64_t max_size_bytes); +uint64_t getTypeFieldNext(const llvm::DataLayout& layout, llvm::Type* ty, uint64_t offset); // And create a type for a metadata operand diff --git a/compiler/include/symbol.h b/compiler/include/symbol.h index 9e8c26dbfdbe..4f65e337a4f8 100644 --- a/compiler/include/symbol.h +++ b/compiler/include/symbol.h @@ -809,6 +809,7 @@ typedef enum { NONE, BASIC, FULL, + EVERY, // after every optimization if possible // These options allow instrumenting the pass pipeline // and match ExtensionPointTy in PassManagerBuilder EarlyAsPossible, diff --git a/compiler/include/type.h b/compiler/include/type.h index b53b98ce6463..b1171a378866 100644 --- a/compiler/include/type.h +++ b/compiler/include/type.h @@ -74,11 +74,12 @@ class Type : public BaseAST { virtual int codegenStructure(FILE* outfile, const char* baseoffset); - virtual Symbol* getField(const char* name, bool fatal = true); + virtual Symbol* getField(const char* name, bool fatal = true) const; void addSymbol(TypeSymbol* newSymbol); bool isDefaultIntentConst() const; + bool isWidePtrType() const; // get/set on the type destructor bool hasDestructor() const; diff --git a/compiler/main/driver.cpp b/compiler/main/driver.cpp index 06675860620d..9b8a706cf0ff 100644 --- a/compiler/main/driver.cpp +++ b/compiler/main/driver.cpp @@ -80,13 +80,10 @@ const char* CHPL_NETWORK_ATOMICS = NULL; const char* CHPL_GMP = NULL; const char* CHPL_HWLOC = NULL; const char* CHPL_REGEXP = NULL; -const char* CHPL_WIDE_POINTERS = NULL; const char* CHPL_LLVM = NULL; const char* CHPL_AUX_FILESYS = NULL; const char* CHPL_UNWIND = NULL; -bool widePointersStruct; - static char libraryFilename[FILENAME_MAX] = ""; static char incFilename[FILENAME_MAX] = ""; static char moduleSearchPath[FILENAME_MAX] = ""; @@ -903,7 +900,6 @@ static ArgumentDescription arg_desc[] = { {"target-platform", ' ', "", "Platform for cross-compilation", "S", NULL, "_CHPL_TARGET_PLATFORM", setEnv}, {"tasks", ' ', "", "Specify tasking implementation", "S", NULL, "_CHPL_TASKS", setEnv}, {"timers", ' ', "", "Specify timer implementation", "S", NULL, "_CHPL_TIMERS", setEnv}, - {"wide-pointers", ' ', "", "Specify wide pointer format", "S", NULL, "_CHPL_WIDE_POINTERS", setEnv}, {"", ' ', NULL, "Compiler Information Options", NULL, NULL, NULL, NULL}, DRIVER_ARG_COPYRIGHT, @@ -1144,7 +1140,6 @@ static void setChapelEnvs() { CHPL_GMP = envMap["CHPL_GMP"]; CHPL_HWLOC = envMap["CHPL_HWLOC"]; CHPL_REGEXP = envMap["CHPL_REGEXP"]; - CHPL_WIDE_POINTERS = envMap["CHPL_WIDE_POINTERS"]; CHPL_LLVM = envMap["CHPL_LLVM"]; CHPL_AUX_FILESYS = envMap["CHPL_AUX_FILESYS"]; CHPL_UNWIND = envMap["CHPL_UNWIND"]; @@ -1205,14 +1200,6 @@ static void setMaxCIndentLen() { if (gotPGI) fMaxCIdentLen = 1020; } -static void setWidePointersStruct() { - if (0 == strcmp(CHPL_WIDE_POINTERS, "struct")) { - widePointersStruct = true; - } else { - widePointersStruct = false; - } -} - static void setPrintCppLineno() { if (developer && !userSetCppLineno) printCppLineno = false; } @@ -1245,8 +1232,6 @@ static void postprocess_args() { setMaxCIndentLen(); - setWidePointersStruct(); - postLocal(); postTaskTracking(); diff --git a/compiler/passes/insertWideReferences.cpp b/compiler/passes/insertWideReferences.cpp index 4a057288fbf8..012278729f25 100644 --- a/compiler/passes/insertWideReferences.cpp +++ b/compiler/passes/insertWideReferences.cpp @@ -1611,6 +1611,7 @@ static void derefWideRefsToWideClasses() SET_LINENO(call); VarSymbol* tmp = newTemp(call->get(1)->getValType()); call->getStmtExpr()->insertBefore(new DefExpr(tmp)); + // Probably added here call->getStmtExpr()->insertBefore(new CallExpr(PRIM_MOVE, tmp, new CallExpr(PRIM_DEREF, call->get(1)->remove()))); call->insertAtHead(tmp); } @@ -2096,6 +2097,7 @@ static void fixAST() { } else if (call->isPrimitive(PRIM_MOVE) || call->isPrimitive(PRIM_ASSIGN)) { // TODO: Local checks for references from GET_MEMBER_VALUE + // ? add a case for PRIM_GET_MEMBER if (CallExpr* rhs = toCallExpr(call->get(2))) { if (rhs->isPrimitive(PRIM_ADDR_OF) || rhs->isPrimitive(PRIM_SET_REFERENCE)) { SymExpr* LHS = toSymExpr(call->get(1)); diff --git a/compiler/util/clangUtil.cpp b/compiler/util/clangUtil.cpp index c78de5f9293a..18e1492fdbb3 100644 --- a/compiler/util/clangUtil.cpp +++ b/compiler/util/clangUtil.cpp @@ -31,9 +31,21 @@ #include #ifdef HAVE_LLVM +#include "clang/AST/GlobalDecl.h" +#include "clang/CodeGen/BackendUtil.h" +#include "clang/CodeGen/CodeGenABITypes.h" +#include "clang/CodeGen/ModuleBuilder.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" #include "clang/Driver/Job.h" + +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/IPO.h" + #endif #include "astutil.h" @@ -73,41 +85,103 @@ using namespace llvm; #define GLOBAL_PTR_SPACE 100 #define WIDE_PTR_SPACE 101 -#define GLOBAL_PTR_SIZE 64 +#define GLOBAL_PTR_SIZE 128 #define GLOBAL_PTR_ABI_ALIGN 64 #define GLOBAL_PTR_PREF_ALIGN 64 +#define LLVM_MODULE_NAME "root" + #include "llvmGlobalToWide.h" #include "llvmAggregateGlobalOps.h" #include "llvmDumpIR.h" -// TODO - add functionality to clang so that we don't -// have to have what are basically copies of -// ModuleBuilder.cpp -// ( and BackendUtil.cpp but we used PassManagerBuilder::addGlobalExtension) -// -// This one is not normally included by clang clients -// and not normally installed in the include directory. -// -// Q. Could we instead call methods on clang::CodeGenerator subclass of -// ASTConsumer such as HandleTopLevelDecl to achieve what we want? -// We would have a different AST visitor for populating the LVT. -// -// It is likely that we can leave the C parser "open" somehow and then -// add statements to it at the end. -// BUT we couldn't call EmitDeferredDecl. -// -// +// These are headers internal to clang. Need to be able to: +// 1. Get the LLVM type for a C typedef (say) -- not needed after LLVM 5 +// 2. Get the GEP offset for a field in a C record by name #include "CodeGenModule.h" #include "CGRecordLayout.h" -#include "CGDebugInfo.h" -#include "clang/CodeGen/BackendUtil.h" +//#include "CGDebugInfo.h" static void setupForGlobalToWide(); +static void adjustLayoutForGlobalToWide(); +static void setupModule(); fileinfo gAllExternCode; fileinfo gChplCompilationConfig; +// forward declare +class CCodeGenConsumer; +class CCodeGenAction; + +// This class stores information about the embedded clang compiler +// instance. +struct ClangInfo { + + bool parseOnly; + + std::string clangCC; + std::string clangCXX; + std::string compileline; + std::vector clangCCArgs; + std::vector clangLDArgs; + std::vector clangOtherArgs; + + clang::CodeGenOptions codegenOptions; + llvm::IntrusiveRefCntPtr diagOptions; + clang::TextDiagnosticPrinter* DiagClient; + llvm::IntrusiveRefCntPtr DiagID; + llvm::IntrusiveRefCntPtr Diags; + + clang::CompilerInstance *Clang; + + // Once we get to code generation.... + clang::ASTContext *Ctx; + + clang::CodeGenerator *cCodeGen; + CCodeGenAction *cCodeGenAction; + + // We stash the layout that Clang would like to use here. + // With fLLVMWideOpt, this will be the layout that we + // pass to the code generator even though we modify the + // version in the module (to add global pointer types) + // before running optimization. + std::string asmTargetLayoutStr; + + ClangInfo( + std::string clangCcIn, + std::string clangCxxIn, + std::string compilelineIn, + std::vector clangCCArgsIn, + std::vector clangLDArgsIn, + std::vector clangOtherArgsIn, + bool parseOnlyIn); +}; + +ClangInfo::ClangInfo( + std::string clangCcIn, + std::string clangCxxIn, + std::string compilelineIn, + std::vector clangCCArgsIn, + std::vector clangLDArgsIn, + std::vector clangOtherArgsIn, + bool parseOnlyIn) + : parseOnly(parseOnlyIn), + clangCC(clangCcIn), + clangCXX(clangCxxIn), + compileline(compilelineIn), + clangCCArgs(clangCCArgsIn), clangLDArgs(clangLDArgsIn), + clangOtherArgs(clangOtherArgsIn), + codegenOptions(), diagOptions(NULL), + DiagClient(NULL), + DiagID(NULL), + Diags(NULL), + Clang(NULL), + Ctx(NULL), + cCodeGen(NULL), cCodeGenAction(NULL), + asmTargetLayoutStr() +{ +} + static VarSymbol *minMaxConstant(int nbits, bool isSigned, bool isMin) { @@ -188,74 +262,12 @@ void addMinMax(ASTContext* Ctx, const char* prefix, clang::CanQualType qt) static void setupClangContext(GenInfo* info, ASTContext* Ctx) { - std::string layout; - - info->Ctx = Ctx; - if( ! info->parseOnly ) { - info->module->setTargetTriple( - info->Ctx->getTargetInfo().getTriple().getTriple()); - - // Also setup some basic TBAA metadata nodes. - llvm::LLVMContext& cx = info->module->getContext(); - // Create the TBAA root node - { - LLVM_METADATA_OPERAND_TYPE* Ops[1]; - Ops[0] = llvm::MDString::get(cx, "Chapel types"); - info->tbaaRootNode = llvm::MDNode::get(cx, Ops); - } - } - -#if HAVE_LLVM_VER >= 39 - info->targetLayout = - info->Ctx->getTargetInfo().getDataLayout().getStringRepresentation(); -#elif HAVE_LLVM_VER >= 38 - info->targetLayout = info->Ctx->getTargetInfo().getDataLayoutString(); -#else - info->targetLayout = info->Ctx->getTargetInfo().getTargetDescription(); -#endif - layout = info->targetLayout; - - if( fLLVMWideOpt && ! info->parseOnly ) { - char buf[200]; //needs to store up to 8 32-bit numbers in decimal - - assert(GLOBAL_PTR_SIZE == GLOBAL_PTR_BITS); - - // Add global pointer info to layout. - snprintf(buf, sizeof(buf), "-p%u:%u:%u:%u-p%u:%u:%u:%u", GLOBAL_PTR_SPACE, GLOBAL_PTR_SIZE, GLOBAL_PTR_ABI_ALIGN, GLOBAL_PTR_PREF_ALIGN, WIDE_PTR_SPACE, GLOBAL_PTR_SIZE, GLOBAL_PTR_ABI_ALIGN, GLOBAL_PTR_PREF_ALIGN); - layout += buf; - // Save the global address space we are using in info. - info->globalToWideInfo.globalSpace = GLOBAL_PTR_SPACE; - info->globalToWideInfo.wideSpace = WIDE_PTR_SPACE; - } - // Always set the module layout. This works around an apparent bug in - // clang or LLVM (trivial/deitz/test_array_low.chpl would print out the - // wrong answer because some i64s were stored at the wrong alignment). - if( info->module ) info->module->setDataLayout(layout); - - info->targetData = -#if HAVE_LLVM_VER >= 39 - new LLVM_TARGET_DATA(info->Ctx->getTargetInfo().getDataLayout().getStringRepresentation()); -#elif HAVE_LLVM_VER >= 38 - new LLVM_TARGET_DATA(info->Ctx->getTargetInfo().getDataLayoutString()); -#else - new LLVM_TARGET_DATA(info->Ctx->getTargetInfo().getTargetDescription()); -#endif - if( ! info->parseOnly ) { - info->cgBuilder = new CodeGen::CodeGenModule(*Ctx, -#if HAVE_LLVM_VER >= 37 - info->Clang->getHeaderSearchOpts(), - info->Clang->getPreprocessorOpts(), -#endif - info->codegenOptions, - *info->module, -#if HAVE_LLVM_VER <= 37 - *info->targetData, -#endif - *info->Diags); - } + ClangInfo* clangInfo = info->clangInfo; + INT_ASSERT(clangInfo); + clangInfo->Ctx = Ctx; - // Set up some constants that depend on the Clang context. + // Set up some constants that depend on the Clang context. { addMinMax(Ctx, "CHAR", Ctx->CharTy); addMinMax(Ctx, "SCHAR", Ctx->SignedCharTy); @@ -277,7 +289,11 @@ static void handleMacro(const IdentifierInfo* id, const MacroInfo* macro) { GenInfo* info = gGenInfo; - Preprocessor &preproc = info->Clang->getPreprocessor(); + INT_ASSERT(info); + ClangInfo* clangInfo = info->clangInfo; + INT_ASSERT(clangInfo); + + Preprocessor &preproc = clangInfo->Clang->getPreprocessor(); VarSymbol* varRet = NULL; TypeDecl* cTypeRet = NULL; ValueDecl* cValueRet = NULL; @@ -479,6 +495,9 @@ void handleMacro(const IdentifierInfo* id, const MacroInfo* macro) static void readMacrosClang(void) { GenInfo* info = gGenInfo; + INT_ASSERT(info); + ClangInfo* clangInfo = info->clangInfo; + INT_ASSERT(clangInfo); LayeredValueTable *lvt = info->lvt; SET_LINENO(rootModule); @@ -500,7 +519,7 @@ void readMacrosClang(void) { addMinMax("UINT64", 64, false); //printf("Running ReadMacrosAction\n"); - Preprocessor &preproc = info->Clang->getPreprocessor(); + Preprocessor &preproc = clangInfo->Clang->getPreprocessor(); // Identify macro-functions and macro-values. // Later, if we see a use of a macro-function, we can // compile it to a static/inline function with args types based an use @@ -526,104 +545,77 @@ void readMacrosClang(void) { } }; -// We need a way to: -// 1: parse code only -// 2: keep the code generator open until we finish generating Chapel code, +// This ASTConsumer helps us to: +// 1: parse code only in certain configurations +// 2: Convert C code to LLVM IR in others +// 3: keep the code generator open until we finish generating Chapel code, // since we might need to code generate called functions. -// 3: append to the target description // 4: get LLVM values for code generated C things (e.g. types, function ptrs) -// -// This code is boiler-plate code mostly copied from ModuleBuilder.cpp - see -// http://clang.llvm.org/doxygen/ModuleBuilder_8cpp_source.html -// Note that ModuleBuilder.cpp is from the clang project and distributed -// under a BSD-like license. -// -// As far as we know, there is no public API for clang that -// would allow us the level of control we need over code generation. -// The portions that are not copied are delineated by -// comments indicating that they are custom to Chapel. class CCodeGenConsumer : public ASTConsumer { private: GenInfo* info; - unsigned HandlingTopLevelDecls; - SmallVector DeferredInlineMethodDefinitions; + clang::DiagnosticsEngine* Diags; + clang::CodeGenerator* Builder; + bool parseOnly; + ASTContext* savedCtx; - struct HandlingTopLevelDeclRAII { - CCodeGenConsumer &Self; - HandlingTopLevelDeclRAII(CCodeGenConsumer &Self) : Self(Self) { - ++Self.HandlingTopLevelDecls; - } - ~HandlingTopLevelDeclRAII() { - if (--Self.HandlingTopLevelDecls == 0) - Self.EmitDeferredDecls(); - } - }; public: CCodeGenConsumer() - : ASTConsumer(), info(gGenInfo), HandlingTopLevelDecls(0) { - } + : ASTConsumer(), + info(gGenInfo), + Diags(info->clangInfo->Diags.get()), + Builder(NULL), + parseOnly(info->clangInfo->parseOnly), + savedCtx(NULL) + { - virtual ~CCodeGenConsumer() { } + if (!parseOnly) { + Builder = CreateLLVMCodeGen( + *Diags, + LLVM_MODULE_NAME, + info->clangInfo->Clang->getHeaderSearchOpts(), + info->clangInfo->Clang->getPreprocessorOpts(), + info->clangInfo->codegenOptions, + info->llvmContext); + + INT_ASSERT(Builder); + INT_ASSERT(!info->module); + info->module = Builder->GetModule(); + info->clangInfo->cCodeGen = Builder; + + // compute target triple, data layout + setupModule(); + } + } - // these macros help us to copy and paste the code from ModuleBuilder. -#define Ctx (info->Ctx) -#define Diags (* info->Diags) -#define Builder (info->cgBuilder) -#define CodeGenOpts (info->codegenOptions) + ~CCodeGenConsumer() { } - // mostly taken from ModuleBuilder.cpp + // Start ASTVisitor Overrides + void Initialize(ASTContext &Context) LLVM_CXX_OVERRIDE { - /// ASTConsumer override: - // Initialize - This is called to initialize the consumer, providing - // the ASTContext. - virtual void Initialize(ASTContext &Context) LLVM_CXX_OVERRIDE { - // This does setTargetTriple, setDataLayout, initialize targetData - // and cgBuilder. setupClangContext(info, &Context); -#if HAVE_LLVM_VER <= 38 - for (size_t i = 0, e = CodeGenOpts.DependentLibraries.size(); i < e; ++i) - HandleDependentLibrary(CodeGenOpts.DependentLibraries[i]); -#else - for (auto &&Lib : CodeGenOpts.DependentLibraries) - Builder->AddDependentLib(Lib); - for (auto &&Opt : CodeGenOpts.LinkerOptions) - Builder->AppendLinkerOptions(Opt); -#endif - } - - // ASTConsumer override: - // HandleCXXStaticMemberVarInstantiation - Tell the consumer that - // this variable has been instantiated. - virtual void HandleCXXStaticMemberVarInstantiation(VarDecl *VD) LLVM_CXX_OVERRIDE { - // Custom to Chapel - if( info->parseOnly ) return; - // End custom to Chapel + if (parseOnly) return; - if (Diags.hasErrorOccurred()) - return; + // Call Initialize on the code generator + // Note: this can call setDataLayout on the module! + Builder->Initialize(Context); - Builder->HandleCXXStaticMemberVarInstantiation(VD); + // Adjust the data layout again since it might have been overwritten. + adjustLayoutForGlobalToWide(); } - // ASTConsumer override: - // // HandleTopLevelDecl - Handle the specified top-level declaration. // This is called by the parser to process every top-level Decl*. // // \returns true to continue parsing, or false to abort parsing. - virtual bool HandleTopLevelDecl(DeclGroupRef DG) LLVM_CXX_OVERRIDE { - if (Diags.hasErrorOccurred()) - return true; + bool HandleTopLevelDecl(DeclGroupRef DG) LLVM_CXX_OVERRIDE { - HandlingTopLevelDeclRAII HandlingDecl(*this); + if (Diags->hasErrorOccurred()) return true; - // Make sure to emit all elements of a Decl. for (DeclGroupRef::iterator I = DG.begin(), E = DG.end(); I != E; ++I) { - // Custom to Chapel if(TypedefDecl *td = dyn_cast(*I)) { const clang::Type *ctype= td->getUnderlyingType().getTypePtrOrNull(); - //printf("Adding typedef %s\n", td->getNameAsString().c_str()); if(ctype != NULL) { info->lvt->addGlobalCDecl(td); } @@ -637,114 +629,48 @@ class CCodeGenConsumer : public ASTConsumer { info->lvt->addGlobalCDecl(rd); } } - if( info->parseOnly ) continue; - // End custom to Chapel - - Builder->EmitTopLevelDecl(*I); } - return true; - } + if (parseOnly) return true; - // ModuleBuilder.cpp has EmitDeferredDecls but that's not in ASTConsumer. - void EmitDeferredDecls() { - if (DeferredInlineMethodDefinitions.empty()) - return; - - // Emit any deferred inline method definitions. Note that more deferred - // methods may be added during this loop, since ASTConsumer callbacks - // can be invoked if AST inspection results in declarations being added. - HandlingTopLevelDeclRAII HandlingDecl(*this); - for (unsigned I = 0; I != DeferredInlineMethodDefinitions.size(); ++I) - Builder->EmitTopLevelDecl(DeferredInlineMethodDefinitions[I]); - DeferredInlineMethodDefinitions.clear(); + return Builder->HandleTopLevelDecl(DG); } #if HAVE_LLVM_VER >= 39 - // ASTConsumer override: // \brief This callback is invoked each time an inline (method or friend) // function definition in a class is completed. void HandleInlineFunctionDefinition(FunctionDecl *D) override { - if (Diags.hasErrorOccurred()) - return; - - assert(D->doesThisDeclarationHaveABody()); - - // Handle friend functions. - if (D->isInIdentifierNamespace(Decl::IDNS_OrdinaryFriend)) { - if (Ctx->getTargetInfo().getCXXABI().isMicrosoft() - && !D->getLexicalDeclContext()->isDependentContext()) - Builder->EmitTopLevelDecl(D); - return; - } - - // Otherwise, must be a method. - auto MD = cast(D); - - // We may want to emit this definition. However, that decision might be - // based on computing the linkage, and we have to defer that in case we - // are inside of something that will change the method's final linkage, - // e.g. - // typedef struct { - // void bar(); - // void foo() { bar(); } - // } A; - DeferredInlineMethodDefinitions.push_back(MD); - - // Provide some coverage mapping even for methods that aren't emitted. - // Don't do this for templated classes though, as they may not be - // instantiable. - if (!MD->getParent()->getDescribedClassTemplate()) - Builder->AddDeferredUnusedCoverageMapping(MD); + if (Diags->hasErrorOccurred()) return; + if (parseOnly) return; + Builder->HandleInlineFunctionDefinition(D); } #else - // ASTConsumer override: // \brief This callback is invoked each time an inline method // definition is completed. - virtual void HandleInlineMethodDefinition(CXXMethodDecl *D) LLVM_CXX_OVERRIDE { - if (Diags.hasErrorOccurred()) - return; - - assert(D->doesThisDeclarationHaveABody()); - - // We may want to emit this definition. However, that decision might be - // based on computing the linkage, and we have to defer that in case we - // are inside of something that will change the method's final linkage, - // e.g. - // typedef struct { - // void bar(); - // void foo() { bar(); } - // } A; - DeferredInlineMethodDefinitions.push_back(D); - - // Provide some coverage mapping even for methods that aren't emitted. - // Don't do this for templated classes though, as they may not be - // instantiable. - if (!D->getParent()->getDescribedClassTemplate()) - Builder->AddDeferredUnusedCoverageMapping(D); + void HandleInlineMethodDefinition(CXXMethodDecl *D) LLVM_CXX_OVERRIDE { + if (Diags->hasErrorOccurred()) return; + if (parseOnly) return; + Builder->HandleInlineMethodDefinition(D); } #endif - // skipped ASTConsumer HandleInterestingDecl - // HandleTagDeclRequiredDefinition - // HandleCXXImplicitFunctionInstantiation - // HandleTopLevelDeclInObjCContainer - // HandleImplicitImportDecl - // GetASTMutationListener - // GetASTDeserializationListener - // PrintStats - // shouldSkipFunctionBody - - // ASTConsumer override: - // HandleTagDeclDefinition - This callback is invoked each time a TagDecl - // to (e.g. struct, union, enum, class) is completed. This allows the - // client hack on the type, which can occur at any point in the file - // (because these can be defined in declspecs). - virtual void HandleTagDeclDefinition(TagDecl *D) LLVM_CXX_OVERRIDE { - if (Diags.hasErrorOccurred()) - return; - - // Custom to Chapel - make a note of C globals + void HandleInterestingDecl(DeclGroupRef D) LLVM_CXX_OVERRIDE { + if (Diags->hasErrorOccurred()) return; + if (parseOnly) return; + Builder->HandleInterestingDecl(D); + } + + void HandleTranslationUnit(ASTContext &Context) LLVM_CXX_OVERRIDE { + // Don't call Builder->HandleTranslationUnit yet, so that we + // can keep it open to codegen more later. + savedCtx = &Context; + INT_ASSERT(savedCtx == info->clangInfo->Ctx); + } + + void HandleTagDeclDefinition(TagDecl *D) LLVM_CXX_OVERRIDE { + if (Diags->hasErrorOccurred()) return; + + // make a note of C globals if(EnumDecl *ed = dyn_cast(D)) { // Add the enum type info->lvt->addGlobalCDecl(ed); @@ -761,211 +687,147 @@ class CCodeGenConsumer : public ASTConsumer { info->lvt->addGlobalCDecl(rd); } } - if( info->parseOnly ) return; - // End Custom to Chapel - - Builder->UpdateCompletedType(D); - - // For MSVC compatibility, treat declarations of static data members with - // inline initializers as definitions. - if (Ctx->getLangOpts().MSVCCompat) { - for (Decl *Member : D->decls()) { - if (VarDecl *VD = dyn_cast(Member)) { - if (Ctx->isMSStaticDataMemberInlineDefinition(VD) && - Ctx->DeclMustBeEmitted(VD)) { - Builder->EmitGlobal(VD); - } - } - } - } + if (parseOnly) return; + Builder->HandleTagDeclDefinition(D); } - // ASTConsumer override: - // \brief This callback is invoked the first time each TagDecl is required - // to be complete. - virtual void HandleTagDeclRequiredDefinition(const TagDecl *D) LLVM_CXX_OVERRIDE { - if (Diags.hasErrorOccurred()) - return; - - if( info->parseOnly ) return; + void HandleTagDeclRequiredDefinition(const TagDecl *D) LLVM_CXX_OVERRIDE { + if (Diags->hasErrorOccurred()) return; + if (parseOnly) return; + Builder->HandleTagDeclRequiredDefinition(D); + } - if (CodeGen::CGDebugInfo *DI = Builder->getModuleDebugInfo()) - if (const RecordDecl *RD = dyn_cast(D)) - DI->completeRequiredType(RD); + void HandleCXXImplicitFunctionInstantiation(FunctionDecl *D) LLVM_CXX_OVERRIDE { + if (Diags->hasErrorOccurred()) return; + if (parseOnly) return; + Builder->HandleCXXImplicitFunctionInstantiation(D); } + void HandleTopLevelDeclInObjCContainer(DeclGroupRef D) LLVM_CXX_OVERRIDE { + if (Diags->hasErrorOccurred()) return; + if (parseOnly) return; + Builder->HandleTopLevelDeclInObjCContainer(D); + } - // ASTConsumer override: - // HandleTranslationUnit - This method is called when the ASTs for - // entire translation unit have been parsed. - virtual void HandleTranslationUnit(ASTContext &Context) LLVM_CXX_OVERRIDE { - if (Diags.hasErrorOccurred()) { - if(Builder) - Builder->clear(); - return; - } - - /* custom to Chapel - - we don't release the builder now, because - we want to add a bunch of uses of functions - that may not have been codegened yet. - - Instead, we call this in cleanupClang. - if (Builder) - Builder->Release(); - */ - } + void HandleImplicitImportDecl(ImportDecl *D) LLVM_CXX_OVERRIDE { + if (Diags->hasErrorOccurred()) return; + if (parseOnly) return; + Builder->HandleImplicitImportDecl(D); + } - // ASTConsumer override: - // - // CompleteTentativeDefinition - Callback invoked at the end of a - // translation unit to notify the consumer that the given tentative - // definition should be completed. - // - // The variable declaration - // itself will be a tentative definition. If it had an incomplete - // array type, its type will have already been changed to an array - // of size 1. However, the declaration remains a tentative - // definition and has not been modified by the introduction of an - // implicit zero initializer. - virtual void CompleteTentativeDefinition(VarDecl *D) LLVM_CXX_OVERRIDE { - if (Diags.hasErrorOccurred()) - return; - - // Custom to Chapel - if( info->parseOnly ) return; - // End Custom to Chapel - - Builder->EmitTentativeDefinition(D); - } + void CompleteTentativeDefinition(VarDecl *D) LLVM_CXX_OVERRIDE { + if (Diags->hasErrorOccurred()) return; + if (parseOnly) return; + Builder->CompleteTentativeDefinition(D); + } - // ASTConsumer override: - // \brief Callback involved at the end of a translation unit to - // notify the consumer that a vtable for the given C++ class is - // required. - // - // \param RD The class whose vtable was used. - virtual void HandleVTable(CXXRecordDecl *RD -#if HAVE_LLVM_VER < 37 - , bool DefinitionRequired -#endif - ) LLVM_CXX_OVERRIDE { - if (Diags.hasErrorOccurred()) - return; + void AssignInheritanceModel(CXXRecordDecl *RD) LLVM_CXX_OVERRIDE { + if (Diags->hasErrorOccurred()) return; + if (parseOnly) return; + Builder->AssignInheritanceModel(RD); + } - // Custom to Chapel - if( info->parseOnly ) return; - // End Custom to Chapel + void HandleCXXStaticMemberVarInstantiation(VarDecl *VD) LLVM_CXX_OVERRIDE { + if (Diags->hasErrorOccurred()) return; + if (parseOnly) return; + Builder->HandleCXXStaticMemberVarInstantiation(VD); + } - Builder->EmitVTable(RD -#if HAVE_LLVM_VER < 37 - , DefinitionRequired -#endif - ); + void HandleVTable(CXXRecordDecl *RD) LLVM_CXX_OVERRIDE { + if (Diags->hasErrorOccurred()) return; + if (parseOnly) return; + Builder->HandleVTable(RD); } -#if HAVE_LLVM_VER <= 38 - // ASTConsumer override: - // - // \brief Handle a pragma that appends to Linker Options. Currently - // this only exists to support Microsoft's #pragma comment(linker, - // "/foo"). - virtual void HandleLinkerOptionPragma(llvm::StringRef Opts) override { - Builder->AppendLinkerOptions(Opts); - } + ASTMutationListener *GetASTMutationListener() LLVM_CXX_OVERRIDE { + if (Builder) return Builder->GetASTMutationListener(); + return nullptr; + } - // HandleLinkerOptionPragma - // ASTConsumer override: - // \brief Handle a pragma that emits a mismatch identifier and value to - // the object file for the linker to work with. Currently, this only - // exists to support Microsoft's #pragma detect_mismatch. - virtual void HandleDetectMismatch(llvm::StringRef Name, - llvm::StringRef Value) LLVM_CXX_OVERRIDE { - Builder->AddDetectMismatch(Name, Value); - } + ASTDeserializationListener *GetASTDeserializationListener() LLVM_CXX_OVERRIDE { + if (Builder) return Builder->GetASTDeserializationListener(); + return nullptr; + } - // ASTConsumer override: - // \brief Handle a dependent library created by a pragma in the source. - /// Currently this only exists to support Microsoft's - /// #pragma comment(lib, "/foo"). - virtual void HandleDependentLibrary(llvm::StringRef Lib) LLVM_CXX_OVERRIDE { - Builder->AddDependentLib(Lib); - } -#endif + void PrintStats() LLVM_CXX_OVERRIDE { + if (Builder) Builder->PrintStats(); + } - // undefine macros we created to help with ModuleBuilder -#undef Ctx -#undef Diags -#undef Builder -#undef CodeGenOpts + bool shouldSkipFunctionBody(Decl *D) LLVM_CXX_OVERRIDE { + if (Builder) return Builder->shouldSkipFunctionBody(D); + return true; + } -}; + // End ASTVisitor overrides + + const Decl *GetDeclForMangledName(llvm::StringRef MangledName) { + return Builder->GetDeclForMangledName(MangledName); + } + llvm::Constant *GetAddrOfGlobal(GlobalDecl decl) { + return Builder->GetAddrOfGlobal(decl, false); + } +}; -#if HAVE_LLVM_VER >= 36 -#define CREATE_AST_CONSUMER_RETURN_TYPE std::unique_ptr -#else -#define CREATE_AST_CONSUMER_RETURN_TYPE ASTConsumer* -#endif class CCodeGenAction : public ASTFrontendAction { public: CCodeGenAction() { } protected: - virtual CREATE_AST_CONSUMER_RETURN_TYPE CreateASTConsumer( - CompilerInstance &CI, StringRef InFile); + std::unique_ptr + CreateASTConsumer(CompilerInstance &CI, StringRef InFile); }; -CREATE_AST_CONSUMER_RETURN_TYPE CCodeGenAction::CreateASTConsumer( - CompilerInstance &CI, StringRef InFile) { -#if HAVE_LLVM_VER >= 36 - return std::unique_ptr(new CCodeGenConsumer()); -#else - return new CCodeGenConsumer(); -#endif +std::unique_ptr +CCodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { + CCodeGenConsumer* c = new CCodeGenConsumer(); + return std::unique_ptr(c); }; -static void finishClang(GenInfo* info){ - if( info->cgBuilder ) { - info->cgBuilder->Release(); +static void finishClang(ClangInfo* clangInfo){ + if( clangInfo->cCodeGen ) { + // This should call Builder->Release() + clangInfo->cCodeGen->HandleTranslationUnit(*clangInfo->Ctx); } - info->Diags.reset(); - info->DiagID.reset(); + clangInfo->Diags.reset(); + clangInfo->DiagID.reset(); } -static void deleteClang(GenInfo* info){ - if( info->cgBuilder ) { - delete info->cgBuilder; - info->cgBuilder = NULL; +static void deleteClang(ClangInfo* clangInfo){ + if( clangInfo->cCodeGen ) { + delete clangInfo->cCodeGen; + clangInfo->cCodeGen = NULL; } - delete info->targetData; - delete info->Clang; - info->Clang = NULL; - delete info->cgAction; - info->cgAction = NULL; + delete clangInfo->Clang; + clangInfo->Clang = NULL; + delete clangInfo->cCodeGenAction; + clangInfo->cCodeGenAction = NULL; } -static void cleanupClang(GenInfo* info) +static void cleanupClang(ClangInfo* clangInfo) { - finishClang(info); - deleteClang(info); + finishClang(clangInfo); + deleteClang(clangInfo); } void setupClang(GenInfo* info, std::string mainFile) { - std::string clangexe = info->clangCC; + ClangInfo* clangInfo = info->clangInfo; + INT_ASSERT(clangInfo); + + std::string clangexe = clangInfo->clangCC; std::vector clangArgs; clangArgs.push_back(""); - for( size_t i = 0; i < info->clangCCArgs.size(); ++i ) { - clangArgs.push_back(info->clangCCArgs[i].c_str()); + for( size_t i = 0; i < clangInfo->clangCCArgs.size(); ++i ) { + clangArgs.push_back(clangInfo->clangCCArgs[i].c_str()); } - for( size_t i = 0; i < info->clangLDArgs.size(); ++i ) { - clangArgs.push_back(info->clangLDArgs[i].c_str()); + for( size_t i = 0; i < clangInfo->clangLDArgs.size(); ++i ) { + clangArgs.push_back(clangInfo->clangLDArgs[i].c_str()); } - for( size_t i = 0; i < info->clangOtherArgs.size(); ++i ) { - clangArgs.push_back(info->clangOtherArgs[i].c_str()); + for( size_t i = 0; i < clangInfo->clangOtherArgs.size(); ++i ) { + clangArgs.push_back(clangInfo->clangOtherArgs[i].c_str()); } clangArgs.push_back("-c"); @@ -995,18 +857,18 @@ void setupClang(GenInfo* info, std::string mainFile) CompilerInstance* Clang = new CompilerInstance(); Clang->createDiagnostics(); - info->diagOptions = new DiagnosticOptions(); - info->DiagClient= new TextDiagnosticPrinter(errs(),&*info->diagOptions); - info->DiagID = new DiagnosticIDs(); + clangInfo->diagOptions = new DiagnosticOptions(); + clangInfo->DiagClient= new TextDiagnosticPrinter(errs(),&*clangInfo->diagOptions); + clangInfo->DiagID = new DiagnosticIDs(); DiagnosticsEngine* Diags = NULL; #if HAVE_LLVM_VER >= 32 Diags = new DiagnosticsEngine( - info->DiagID, &*info->diagOptions, info->DiagClient); + clangInfo->DiagID, &*clangInfo->diagOptions, clangInfo->DiagClient); #else - Diags = new DiagnosticsEngine(info->DiagID, info->DiagClient); + Diags = new DiagnosticsEngine(clangInfo->DiagID, clangInfo->DiagClient); #endif - info->Diags = Diags; - info->Clang = Clang; + clangInfo->Diags = Diags; + clangInfo->Clang = Clang; clang::driver::Driver TheDriver(clangexe, llvm::sys::getDefaultTargetTriple(), *Diags); @@ -1029,7 +891,7 @@ void setupClang(GenInfo* info, std::string mainFile) // TheDriver.BuildCompilation // get a Compilation? //CompilerInvocation* CI = - // createInvocationFromCommandLine(clangArgs, info->Diags); + // createInvocationFromCommandLine(clangArgs, clangInfo->Diags); bool success = CompilerInvocation::CreateFromArgs( Clang->getInvocation(), // &clangArgs.front(), &clangArgs.back(), @@ -1040,11 +902,11 @@ void setupClang(GenInfo* info, std::string mainFile) INT_ASSERT(success); // Get the codegen options from the clang command line. - info->codegenOptions = CI->getCodeGenOpts(); + clangInfo->codegenOptions = CI->getCodeGenOpts(); // if --fast is given, we should be at least at -O3. - if(fFastFlag && info->codegenOptions.OptimizationLevel < 3) { - info->codegenOptions.OptimizationLevel = 3; + if(fFastFlag && clangInfo->codegenOptions.OptimizationLevel < 3) { + clangInfo->codegenOptions.OptimizationLevel = 3; } { @@ -1091,32 +953,166 @@ void setupClang(GenInfo* info, std::string mainFile) #endif } - // Save the TargetOptions and LangOptions since these - // are used during machine code generation. - info->clangTargetOptions = info->Clang->getTargetOpts(); - - // For debugging, it might be useful to check that - // the target architecture has the right features - // (it has been detected correctly). - /*std::vector x = info->clangTargetOptions.FeaturesAsWritten; - printf("target features\n"); - for (auto i : x) { - printf("%s\n", i.c_str()); - }*/ - - info->clangLangOptions = info->Clang->getLangOpts(); - - // Create the compilers actual diagnostics engine. // Create the compilers actual diagnostics engine. #if HAVE_LLVM_VER >= 33 - info->Clang->createDiagnostics(); + clangInfo->Clang->createDiagnostics(); #else - info->Clang->createDiagnostics(int(clangArgs.size()),&clangArgs[0]); + clangInfo->Clang->createDiagnostics(int(clangArgs.size()),&clangArgs[0]); #endif - if (!info->Clang->hasDiagnostics()) + if (!clangInfo->Clang->hasDiagnostics()) INT_FATAL("Bad diagnostics from clang"); + + // Set llvm options + { + std::vector vec; + + // Start with any -mllvm options from the Clang invocation + auto clangMLLVM = CI->getFrontendOpts().LLVMArgs; + for (auto & arg : clangMLLVM) { + vec.push_back(arg); + } + + // Then add any from --mllvm passed to Chapel + if (llvmFlags != "") { + //split llvmFlags by spaces + std::stringstream argsStream(llvmFlags); + std::string arg; + while(argsStream >> arg) + vec.push_back(arg); + } + + std::vector Args; + Args.push_back("chpl-llvm-opts"); + for (auto & i : vec) { + Args.push_back(i.c_str()); + } + Args.push_back(NULL); + + if (printSystemCommands) { + printf("# parsing llvm command line options: "); + for (auto arg : Args) { + if (arg != NULL) + printf(" %s", arg); + } + printf("\n"); + } + + llvm::cl::ParseCommandLineOptions(Args.size()-1, &Args[0]); + } } +static void setupModule() +{ + GenInfo* info = gGenInfo; + INT_ASSERT(info); + ClangInfo* clangInfo = info->clangInfo; + INT_ASSERT(clangInfo); + + if (clangInfo->parseOnly) return; + + INT_ASSERT(info->module); + +#if HAVE_LLVM_VER >= 39 + clangInfo->asmTargetLayoutStr = + clangInfo->Clang->getTarget().getDataLayout().getStringRepresentation(); +#elif HAVE_LLVM_VER >= 38 + clangInfo->asmTargetLayoutStr = clangInfo->Clang->getTarget().getDataLayoutString(); +#else + clangInfo->asmTargetLayoutStr = clangInfo->Clang->getTarget().getTargetDescription(); +#endif + + // Set the target triple. + const llvm::Triple &Triple = + clangInfo->Clang->getTarget().getTriple(); + info->module->setTargetTriple(Triple.getTriple()); + + // Always set the module layout. This works around an apparent bug in + // clang or LLVM (trivial/deitz/test_array_low.chpl would print out the + // wrong answer because some i64s were stored at the wrong alignment). + info->module->setDataLayout(clangInfo->asmTargetLayoutStr); + + adjustLayoutForGlobalToWide(); + + // Set the TargetMachine + std::string Err; + const llvm::Target* Target = TargetRegistry::lookupTarget(Triple.str(), Err); + if (!Target) + USR_FATAL("Could not find LLVM target for %s: %s", + Triple.str().c_str(), Err.c_str()); + + + const clang::TargetOptions & ClangOpts = clangInfo->Clang->getTargetOpts(); + + std::string cpu = ClangOpts.CPU; + std::vector clangFeatures = ClangOpts.Features; + std::string featuresString; + if (!clangFeatures.empty()) { + llvm::SubtargetFeatures features; + for (const std::string &feature : clangFeatures) + features.AddFeature(feature); + featuresString = features.getString(); + } + + if (printSystemCommands) { + printf("# target features %s\n", featuresString.c_str()); + } + + // Set up the TargetOptions + llvm::TargetOptions targetOptions; + targetOptions.ThreadModel = llvm::ThreadModel::POSIX; + + if (ffloatOpt) { + // see also FastMathFlags FM.setUnsafeAlgebra etc + targetOptions.UnsafeFPMath = 1; + targetOptions.AllowFPOpFusion = llvm::FPOpFusion::Fast; + targetOptions.NoNaNsFPMath = 1; + targetOptions.NoInfsFPMath = 1; + //targetOptions.NoSignedZerosFPMath = 1; + // we could also consider: + // NoTrappingFPMath, HonorSignDependentRoundingFPMathOption + } + + if (!fFastFlag) + targetOptions.EnableFastISel = 1; + else { + // things to consider: + // EnableIPRA -- InterProcedural Register Allocation (IPRA). + // GuaranteedTailCallOpt -- guarantee tail call opt (may change fn ABI) + } + + llvm::Reloc::Model relocModel = llvm::Reloc::Model::Static; + // TODO: we may need to use Reloc::PIC_ once we start + // interpreting, etc. + + // Choose the code model + llvm::CodeModel::Model codeModel = llvm::CodeModel::Default; + + llvm::CodeGenOpt::Level optLevel = + fFastFlag ? llvm::CodeGenOpt::Aggressive : llvm::CodeGenOpt::None; + + // Create the target machine. + info->targetMachine = Target->createTargetMachine(Triple.str(), + cpu, + featuresString, + targetOptions, + relocModel, + codeModel, + optLevel); + + + + // TODO: set a module flag with the Chapel ABI version + // m->addModuleFlag(llvm::Module::Error, "Chapel Version", unsigned); + + // Also setup some basic TBAA metadata nodes. + llvm::LLVMContext& cx = info->module->getContext(); + // Create the TBAA root node + { + LLVM_METADATA_OPERAND_TYPE* Ops[1]; + Ops[0] = llvm::MDString::get(cx, "Chapel types"); + info->tbaaRootNode = llvm::MDNode::get(cx, Ops); + } +} void finishCodegenLLVM() { GenInfo* info = gGenInfo; @@ -1132,7 +1128,7 @@ void finishCodegenLLVM() { info->FPM_postgen = NULL; // Now finish any Clang code generation. - finishClang(info); + finishClang(info->clangInfo); if(debug_info)debug_info->finalize(); @@ -1152,44 +1148,63 @@ void finishCodegenLLVM() { } static -void configurePMBuilder(PassManagerBuilder &PMBuilder) { +void configurePMBuilder(PassManagerBuilder &PMBuilder, int optLevel=-1) { + ClangInfo* clangInfo = gGenInfo->clangInfo; + INT_ASSERT(clangInfo); + clang::CodeGenOptions &opts = clangInfo->codegenOptions; + + if (optLevel < 0) + optLevel = opts.OptimizationLevel; + if( fFastFlag ) { - PMBuilder.OptLevel = 3; - PMBuilder.LoopVectorize = true; - PMBuilder.SLPVectorize = true; + // TODO -- remove this assert + INT_ASSERT(opts.OptimizationLevel >= 2); + } + + if (optLevel >= 1) + PMBuilder.Inliner = createFunctionInliningPass(optLevel, + opts.OptimizeSize +#if HAVE_LLVM_VER >= 50 + , /*DisableInlineHotCalsite*/ + false +#endif + ); + + PMBuilder.OptLevel = optLevel; + PMBuilder.SizeLevel = opts.OptimizeSize; #if HAVE_LLVM_VER < 50 - PMBuilder.BBVectorize = true; + PMBuilder.BBVectorize = opts.VectorizeBB; #endif - PMBuilder.DisableUnrollLoops = true; - // TODO: what other flags on PMBuilder should we set? - } else { - PMBuilder.OptLevel = 0; - } + PMBuilder.SLPVectorize = opts.VectorizeSLP; + PMBuilder.LoopVectorize = opts.VectorizeLoop; + PMBuilder.DisableUnrollLoops = !opts.UnrollLoops; + PMBuilder.MergeFunctions = opts.MergeFunctions; + PMBuilder.PrepareForThinLTO = opts.EmitSummaryIndex; + PMBuilder.PrepareForLTO = opts.PrepareForLTO; + PMBuilder.RerollLoops = opts.RerollLoops; + + // TODO: we might need to call TargetMachine's addEarlyAsPossiblePasses } void prepareCodegenLLVM() { GenInfo *info = gGenInfo; - LEGACY_FUNCTION_PASS_MANAGER *fpm = new LEGACY_FUNCTION_PASS_MANAGER(info->module); + llvm::legacy::FunctionPassManager *fpm = new llvm::legacy::FunctionPassManager(info->module); PassManagerBuilder PMBuilder; // Set up the optimizer pipeline. - // Start with registering info about how the - // target lays out data structures. -#if HAVE_LLVM_VER >= 37 - // We already set the data layout in setupClangContext - // don't need to do anything else. -#elif HAVE_LLVM_VER >= 36 - // We already set the data layout in setupClangContext - fpm->add(new DataLayoutPass()); -#elif HAVE_LLVM_VER >= 35 - fpm->add(new DataLayoutPass(info->module)); -#else - fpm->add(new DataLayout(info->module)); -#endif + + // Add the TransformInfo pass + fpm->add(createTargetTransformInfoWrapperPass( + info->targetMachine->getTargetIRAnalysis())); + + // Add the TargetLibraryInfo pass + Triple TargetTriple(info->module->getTargetTriple()); + llvm::TargetLibraryInfoImpl TLII(TargetTriple); + fpm->add(new TargetLibraryInfoWrapperPass(TLII)); configurePMBuilder(PMBuilder); PMBuilder.populateFunctionPassManager(*fpm); @@ -1200,6 +1215,7 @@ void prepareCodegenLLVM() if(ffloatOpt == 1) { + // see also targetOptions.UnsafeFPMath etc llvm::FastMathFlags FM; FM.setNoNaNs(); FM.setNoInfs(); @@ -1208,6 +1224,8 @@ void prepareCodegenLLVM() FM.setUnsafeAlgebra(); info->builder->setFastMathFlags(FM); } + + checkAdjustedDataLayout(); } #if HAVE_LLVM_VER >= 33 @@ -1370,9 +1388,24 @@ void runClang(const char* just_parse_filename) { // Initialize gGenInfo // Toggle LLVM code generation in our clang run; // turn it off if we just wanted to parse some C. - gGenInfo = new GenInfo(clangCC, clangCXX, - compileline, clangCCArgs, clangLDArgs, clangOtherArgs, - just_parse_filename != NULL); + gGenInfo = new GenInfo(); + + bool parseOnly = (just_parse_filename != NULL); + + gGenInfo->lvt = new LayeredValueTable(); + + + ClangInfo* clangInfo = NULL; + clangInfo = new ClangInfo(clangCC, clangCXX, + compileline, + clangCCArgs, clangLDArgs, clangOtherArgs, + parseOnly); + + gGenInfo->clangInfo = clangInfo; + + std::string rtmain = home + "/runtime/etc/rtmain.c"; + + setupClang(gGenInfo, rtmain); if( llvmCodegen || externC ) { @@ -1390,8 +1423,8 @@ void runClang(const char* just_parse_filename) { // and cause them to be emitted eventually. // CCodeGenAction is defined above. It traverses the C AST // and does the code generation. - info->cgAction = new CCodeGenAction(); - if (!info->Clang->ExecuteAction(*info->cgAction)) { + clangInfo->cCodeGenAction = new CCodeGenAction(); + if (!clangInfo->Clang->ExecuteAction(*clangInfo->cCodeGenAction)) { if (just_parse_filename) { USR_FATAL("error running clang on extern block"); } else { @@ -1399,7 +1432,13 @@ void runClang(const char* just_parse_filename) { } } - if( ! info->parseOnly ) { + if( ! parseOnly ) { + // LLVM module should have been created by CCodeGenConsumer + INT_ASSERT(gGenInfo->module); + + // Create a new IRBuilder, and LayeredValueTable. + gGenInfo->builder = new llvm::IRBuilder<>(gGenInfo->module->getContext()); + // This seems to be needed, even though it is strange. // (otherwise we segfault in info->builder->CreateGlobalString) @@ -1420,7 +1459,7 @@ void runClang(const char* just_parse_filename) { // so needs to happen after we set the insert point. readMacrosClang(); - if( ! info->parseOnly ) { + if( ! parseOnly ) { info->builder->CreateRetVoid(); } } @@ -1496,7 +1535,8 @@ void cleanupExternC(void) { ++it ) { ModuleSymbol* module = *it; INT_ASSERT(module->extern_info); - cleanupClang(module->extern_info->gen_info); + cleanupClang(module->extern_info->gen_info->clangInfo); + delete module->extern_info->gen_info->clangInfo; delete module->extern_info->gen_info; delete module->extern_info; // Remove all ExternBlockStmts from this module. @@ -1537,7 +1577,13 @@ llvm::Type* getTypeLLVM(const char* name) llvm::Type* codegenCType(const TypeDecl* td) { GenInfo* info = gGenInfo; - CodeGen::CodeGenTypes & cdt = info->cgBuilder->getTypes(); + INT_ASSERT(info); + ClangInfo* clangInfo = info->clangInfo; + INT_ASSERT(clangInfo); + clang::CodeGenerator* cCodeGen = clangInfo->cCodeGen; + INT_ASSERT(cCodeGen); + + //CodeGen::CodeGenTypes & cdt = info->cgBuilder->getTypes(); QualType qType; // handle TypedefDecl @@ -1560,13 +1606,23 @@ llvm::Type* codegenCType(const TypeDecl* td) } else { INT_FATAL("Unknown clang type declaration"); } - return cdt.ConvertTypeForMem(qType); +#if HAVE_LLVM_VER >= 50 + return clang::CodeGen::convertTypeForMemory(cCodeGen->CGM(), qType); +#else + return cCodeGen->CGM().getTypes().ConvertTypeForMem(qType); +#endif } // should support FunctionDecl,VarDecl,EnumConstantDecl GenRet codegenCValue(const ValueDecl *vd) { GenInfo* info = gGenInfo; + INT_ASSERT(info); + ClangInfo* clangInfo = info->clangInfo; + INT_ASSERT(clangInfo); + clang::CodeGenerator* cCodeGen = clangInfo->cCodeGen; + INT_ASSERT(cCodeGen); + GenRet ret; if( info->cfile ) { @@ -1576,19 +1632,20 @@ GenRet codegenCValue(const ValueDecl *vd) if(const FunctionDecl *fd = dyn_cast(vd)) { // It's a function decl. - ret.val = info->cgBuilder->GetAddrOfFunction(fd); + ret.val = cCodeGen->GetAddrOfGlobal(GlobalDecl(fd), false); ret.isLVPtr = GEN_VAL; } else if(const VarDecl *vard = dyn_cast(vd)) { // It's a (global) variable decl - ret.val = info->cgBuilder->GetAddrOfGlobalVar(vard); + ret.val = cCodeGen->GetAddrOfGlobal(GlobalDecl(vard), false); ret.isLVPtr = GEN_PTR; } else if(const EnumConstantDecl *ed = dyn_cast(vd)) { // It's a constant enum value APInt v = ed->getInitVal(); ret.isUnsigned = ! ed->getType()->hasSignedIntegerRepresentation(); - CodeGen::CodeGenTypes & cdt = info->cgBuilder->getTypes(); - llvm::Type* type = cdt.ConvertTypeForMem(ed->getType()); + + // TODO: should be in clang::CodeGenerator API + llvm::Type* type = cCodeGen->CGM().getTypes().ConvertTypeForMem(ed->getType()); ret.val = ConstantInt::get(type, v); ret.isLVPtr = GEN_VAL; } else { @@ -1714,11 +1771,18 @@ GenRet LayeredValueTable::getValue(StringRef name) { // Convert it to an LLVM value // should support FunctionDecl,VarDecl,EnumConstantDecl - return codegenCValue(store->u.cValueDecl); + GenRet ret = codegenCValue(store->u.cValueDecl); + store->u.value = ret.val; + store->isLVPtr = ret.isLVPtr; + store->isUnsigned = ret.isUnsigned; + return ret; } if( store->u.chplVar && isVarSymbol(store->u.chplVar) ) { VarSymbol* var = store->u.chplVar; GenRet ret = var; // code generate it! + store->u.value = ret.val; + store->isLVPtr = ret.isLVPtr; + store->isUnsigned = ret.isUnsigned; return ret; } } @@ -1746,7 +1810,8 @@ llvm::Type *LayeredValueTable::getType(StringRef name) { // maybe TypedefDecl,EnumDecl,RecordDecl // Convert it to an LLVM type. - return codegenCType(store->u.cTypeDecl); + store->u.type = codegenCType(store->u.cTypeDecl); + return store->u.type; } } return NULL; @@ -1837,6 +1902,12 @@ void LayeredValueTable::swap(LayeredValueTable* other) int getCRecordMemberGEP(const char* typeName, const char* fieldName) { GenInfo* info = gGenInfo; + INT_ASSERT(info); + ClangInfo* clangInfo = info->clangInfo; + INT_ASSERT(clangInfo); + clang::CodeGenerator* cCodeGen = clangInfo->cCodeGen; + INT_ASSERT(cCodeGen); + TypeDecl* d = NULL; int ret; @@ -1865,7 +1936,8 @@ int getCRecordMemberGEP(const char* typeName, const char* fieldName) } } INT_ASSERT(field); - ret=info->cgBuilder->getTypes().getCGRecordLayout(rec).getLLVMFieldNo(field); + // TODO: should be in clang::CodeGenerator API + ret = cCodeGen->CGM().getTypes().getCGRecordLayout(rec).getLLVMFieldNo(field); return ret; } @@ -1889,7 +1961,7 @@ void addGlobalToWide(const PassManagerBuilder &Builder, LEGACY_PASS_MANAGER &PM) { GenInfo* info = gGenInfo; if( fLLVMWideOpt ) { - PM.add(createGlobalToWide(&info->globalToWideInfo, info->targetLayout)); + PM.add(createGlobalToWide(&info->globalToWideInfo, info->clangInfo->asmTargetLayoutStr)); } } @@ -1930,6 +2002,7 @@ bool getIrDumpExtensionPoint(llvmStageNum_t s, case llvmStageNum::NONE: case llvmStageNum::BASIC: case llvmStageNum::FULL: + case llvmStageNum::EVERY: case llvmStageNum::LAST: return false; } @@ -1959,14 +2032,6 @@ void setupForGlobalToWide(void) { info->nodeIdType = ginfo->lvt->getType("c_nodeid_t"); assert(info->nodeIdType); - info->addrFn = getFunctionLLVM("chpl_wide_ptr_get_address"); - INT_ASSERT(info->addrFn); - info->locFn = getFunctionLLVM("chpl_wide_ptr_read_localeID"); - INT_ASSERT(info->locFn); - info->nodeFn = getFunctionLLVM("chpl_wide_ptr_get_node"); - INT_ASSERT(info->nodeFn); - info->makeFn = getFunctionLLVM("chpl_return_wide_ptr_loc_ptr"); - INT_ASSERT(info->makeFn); info->getFn = getFunctionLLVM("chpl_gen_comm_get_ctl"); INT_ASSERT(info->getFn); info->putFn = getFunctionLLVM("chpl_gen_comm_put_ctl"); @@ -2004,8 +2069,7 @@ void setupForGlobalToWide(void) { llvm::BasicBlock::Create(ginfo->module->getContext(), "entry", fn); ginfo->builder->SetInsertPoint(block); - llvm::Constant* fns[] = {info->addrFn, info->locFn, info->nodeFn, - info->makeFn, info->getFn, info->putFn, + llvm::Constant* fns[] = {info->getFn, info->putFn, info->getPutFn, info->memsetFn, NULL}; llvm::Value* ret = llvm::Constant::getNullValue(retType); @@ -2029,6 +2093,62 @@ void setupForGlobalToWide(void) { info->preservingFn = fn; } +static +void adjustLayoutForGlobalToWide() +{ + if( ! fLLVMWideOpt ) return; + + GenInfo* info = gGenInfo; + INT_ASSERT(info); + ClangInfo* clangInfo = info->clangInfo; + INT_ASSERT(clangInfo); + + if (clangInfo->parseOnly) return; + + std::string layout = clangInfo->asmTargetLayoutStr; + + INT_ASSERT(layout != ""); + + char buf[200]; //needs to store up to 8 32-bit numbers in decimal + + // Add global pointer info to layout. + snprintf(buf, sizeof(buf), "-p%u:%u:%u:%u-p%u:%u:%u:%u" + /*"-ni:%u:%u"*/ /* non-integral pointers */, + GLOBAL_PTR_SPACE, + GLOBAL_PTR_SIZE, GLOBAL_PTR_ABI_ALIGN, GLOBAL_PTR_PREF_ALIGN, + WIDE_PTR_SPACE, GLOBAL_PTR_SIZE, GLOBAL_PTR_ABI_ALIGN, + GLOBAL_PTR_PREF_ALIGN /*, GLOBAL_PTR_SPACE, WIDE_PTR_SPACE*/); + layout += buf; + // Save the global address space we are using in info. + info->globalToWideInfo.globalSpace = GLOBAL_PTR_SPACE; + info->globalToWideInfo.wideSpace = WIDE_PTR_SPACE; + info->globalToWideInfo.globalPtrBits = GLOBAL_PTR_SIZE; + + // Always set the module layout. This works around an apparent bug in + // clang or LLVM (trivial/deitz/test_array_low.chpl would print out the + // wrong answer because some i64s were stored at the wrong alignment). + info->module->setDataLayout(layout); + + checkAdjustedDataLayout(); +} + +void checkAdjustedDataLayout() { + + if( ! fLLVMWideOpt ) return; + + GenInfo* info = gGenInfo; + INT_ASSERT(info); + ClangInfo* clangInfo = info->clangInfo; + INT_ASSERT(clangInfo); + + if (clangInfo->parseOnly) return; + + // Check that the data layout setting worked + const llvm::DataLayout& dl = info->module->getDataLayout(); + llvm::Type* testTy = llvm::Type::getInt8PtrTy(info->module->getContext(), + GLOBAL_PTR_SPACE); + INT_ASSERT(dl.getTypeSizeInBits(testTy) == GLOBAL_PTR_SIZE); +} void makeBinaryLLVM(void) { @@ -2039,6 +2159,9 @@ void makeBinaryLLVM(void) { #endif GenInfo* info = gGenInfo; + INT_ASSERT(info); + ClangInfo* clangInfo = info->clangInfo; + INT_ASSERT(clangInfo); std::string moduleFilename = genIntermediateFilename("chpl__module.o"); std::string preOptFilename = genIntermediateFilename("chpl__module-nopt.bc"); @@ -2058,30 +2181,16 @@ void makeBinaryLLVM(void) { output.os().flush(); } -#if HAVE_LLVM_VER >= 39 - std::error_code Error; -#else - tool_output_file output (moduleFilename.c_str(), - errorInfo, -#if HAVE_LLVM_VER >= 34 - sys::fs::F_None -#else - raw_fd_ostream::F_Binary -#endif - ); -#endif - - static bool addedGlobalExts = false; - if( ! addedGlobalExts ) { - // Note, these global extensions currently only apply - // to the module-level optimization (not the "basic" function - // optimization we do immediately after generating LLVM IR). + // Open the output file + std::error_code error; + llvm::sys::fs::OpenFlags flags = llvm::sys::fs::F_None; - // Add the Global to Wide optimization if necessary. - PassManagerBuilder::addGlobalExtension(PassManagerBuilder::EP_ScalarOptimizerLate, addAggregateGlobalOps); - PassManagerBuilder::addGlobalExtension(PassManagerBuilder::EP_ScalarOptimizerLate, addGlobalToWide); - PassManagerBuilder::addGlobalExtension(PassManagerBuilder::EP_EnabledOnOptLevel0, addGlobalToWide); + llvm::raw_fd_ostream outputOfile(moduleFilename, error, flags); + if (error || outputOfile.has_error()) + USR_FATAL("Could not open output file %s", moduleFilename.c_str()); + static bool addedGlobalExts = false; + if( ! addedGlobalExts ) { // Add IR dumping pass if necessary // point is initialized to a dummy value; it is set // in getIrDumpExtensionPoint. @@ -2093,78 +2202,112 @@ void makeBinaryLLVM(void) { PassManagerBuilder::addGlobalExtension(point, addDumpIrPass); } + if (llvmPrintIrStageNum == llvmStageNum::EVERY) { + printf("Adding IR dump extensions for all phases\n"); + for (int i = 0; i < llvmStageNum::LAST; i++) { + llvmStageNum::llvmStageNum_t stage = (llvmStageNum::llvmStageNum_t) i; + if (getIrDumpExtensionPoint(stage, point)) + PassManagerBuilder::addGlobalExtension( + point, + [stage] (const PassManagerBuilder &Builder, + LEGACY_PASS_MANAGER &PM) -> void { + PM.add(createDumpIrPass(stage)); + }); + } + + // Put the print-stage-num back + llvmPrintIrStageNum = llvmStageNum::EVERY; + } + addedGlobalExts = true; } - // Set llvm options - if (llvmFlags != "") { - //split llvmFlags by spaces - std::stringstream argsStream(llvmFlags); - std::vector vec; - std::string arg; - while(argsStream >> arg) - vec.push_back(arg); + // Create PassManager and run optimizations + PassManagerBuilder PMBuilder; - std::vector Args; - Args.push_back("chpl-llvm-opts"); - for (auto & i : vec) { - Args.push_back(i.c_str()); + configurePMBuilder(PMBuilder); + + // Note, these global extensions currently only apply + // to the module-level optimization (not the "basic" function + // optimization we do immediately after generating LLVM IR). + + // Add the Global to Wide optimization if necessary. + if (fLLVMWideOpt) { + PMBuilder.addExtension(PassManagerBuilder::EP_ScalarOptimizerLate, addAggregateGlobalOps); + PMBuilder.addExtension(PassManagerBuilder::EP_ScalarOptimizerLate, addGlobalToWide); + PMBuilder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0, addGlobalToWide); + } + + // Setup for and run LLVM optimization passes + { + adjustLayoutForGlobalToWide(); + + llvm::legacy::PassManager mpm; + + // Add the TransformInfo pass + mpm.add(createTargetTransformInfoWrapperPass( + info->targetMachine->getTargetIRAnalysis())); + + // Add the TargetLibraryInfo pass + Triple TargetTriple(info->module->getTargetTriple()); + llvm::TargetLibraryInfoImpl TLII(TargetTriple); + mpm.add(new TargetLibraryInfoWrapperPass(TLII)); + + PMBuilder.populateModulePassManager(mpm); + + if (fLLVMWideOpt) { + // the GlobalToWide pass creates calls to inline functions, among + // other things, that will need to be optimized. So run an additional + // battery of optimizations now. + + PassManagerBuilder PMBuilder2; + + configurePMBuilder(PMBuilder2, /* opt level */ 1); + + PMBuilder2.populateModulePassManager(mpm); } - Args.push_back(NULL); - llvm::cl::ParseCommandLineOptions(Args.size()-1, &Args[0]); + // Run the optimizations now! + mpm.run(*info->module); + + // Reset the data layout. + info->module->setDataLayout(clangInfo->asmTargetLayoutStr); } - // Note that EmitBackendOutput, when creating a .bc file, - // does *not* run vectorization. We confirmed this with clang 3.7 - // with --save-temps (the resulting .bc file does not contain vector IR - // but the resulting .o file has vectorized loops). - // - // Note, as of LLVM/clang 4.0, we can call EmitBitcode - // and have a simpler story here... - EmitBackendOutput(*info->Diags, -#if HAVE_LLVM_VER >= 40 - info->Clang->getHeaderSearchOpts(), -#endif - info->codegenOptions, - info->clangTargetOptions, info->clangLangOptions, -#if HAVE_LLVM_VER >= 39 - info->Ctx->getTargetInfo().getDataLayout(), -#elif HAVE_LLVM_VER >= 38 - info->Ctx->getTargetInfo().getDataLayoutString(), -#else -#if HAVE_LLVM_VER >= 35 - info->Ctx->getTargetInfo().getTargetDescription(), -#endif -#endif - info->module, Backend_EmitObj, -#if HAVE_LLVM_VER >= 39 - llvm::make_unique( - moduleFilename, - Error, - llvm::sys::fs::F_None) -#else - &output.os() + // Handle --llvm-print-ir-stage=full +#ifdef HAVE_LLVM + if((llvmStageNum::FULL == llvmPrintIrStageNum || + llvmStageNum::EVERY == llvmPrintIrStageNum) && llvmPrintIrCName != NULL) + printLlvmIr(getFunctionLLVM(llvmPrintIrCName), llvmStageNum::FULL); #endif - ); -#if HAVE_LLVM_VER >= 39 - if (Error) - USR_FATAL("Could not create temporary .bc file"); -#endif + // Emit the .o file for linking with clang + // Setup and run LLVM passes to emit a .o file to outputOfile + { + llvm::legacy::PassManager emitPM; -#if HAVE_LLVM_VER <= 38 - output.keep(); - output.os().flush(); -#endif + emitPM.add(createTargetTransformInfoWrapperPass( + info->targetMachine->getTargetIRAnalysis())); + + llvm::TargetMachine::CodeGenFileType FileType = + llvm::TargetMachine::CGFT_ObjectFile; + bool disableVerify = ! developer; + info->targetMachine->addPassesToEmitFile(emitPM, outputOfile, + FileType, + disableVerify); + + // Run the passes to emit the .o file now! + emitPM.run(*info->module); + outputOfile.close(); + } //finishClang is before the call to the debug finalize - deleteClang(info); + deleteClang(clangInfo); std::string options = ""; std::string home(CHPL_HOME); - std::string compileline = info->compileline; + std::string compileline = clangInfo->compileline; compileline += " --llvm" " --clang" " --main.o" @@ -2181,9 +2324,9 @@ void makeBinaryLLVM(void) { // Gather C flags for compiling C files. std::string cargs; - for( size_t i = 0; i < info->clangCCArgs.size(); ++i ) { + for( size_t i = 0; i < clangInfo->clangCCArgs.size(); ++i ) { cargs += " "; - cargs += info->clangCCArgs[i]; + cargs += clangInfo->clangCCArgs[i]; } // Compile any C files. @@ -2229,9 +2372,9 @@ void makeBinaryLLVM(void) { options += " -g"; } - for( size_t i = 0; i < info->clangLDArgs.size(); ++i ) { + for( size_t i = 0; i < clangInfo->clangLDArgs.size(); ++i ) { options += " "; - options += info->clangLDArgs[i].c_str(); + options += clangInfo->clangLDArgs[i].c_str(); } // note: currently ldflags are not stored into clangLDArgs. @@ -2299,10 +2442,6 @@ void makeBinaryLLVM(void) { mysystem(makecmd, "Make Binary - Building Launcher and Copying"); -#ifdef HAVE_LLVM - if(llvmStageNum::FULL == llvmPrintIrStageNum && llvmPrintIrCName != NULL) - printLlvmIr(getFunctionLLVM(llvmPrintIrCName), llvmStageNum::FULL); -#endif } #endif diff --git a/compiler/util/llvm-global-to-wide/CMakeLists.txt b/compiler/util/llvm-global-to-wide/CMakeLists.txt new file mode 100644 index 000000000000..600760c81eaf --- /dev/null +++ b/compiler/util/llvm-global-to-wide/CMakeLists.txt @@ -0,0 +1,88 @@ +cmake_minimum_required(VERSION 3.4) + + +# Detect LLVM +# The user is supposed to set this to a valid llvm 3.7 install root +set(LLVM_ROOT "" CACHE PATH "Root of LLVM install.") +# and source tree +# (we use lit.py from the LLVM source tree) +set(LLVM_SRC "" CACHE PATH "Root of LLVM source tree.") + +set(LLVM_LIT "${LLVM_SRC}/utils/lit/lit.py") + +# sanity check LLVM install path +if(NOT EXISTS "${LLVM_ROOT}/include/llvm" ) + message(FATAL_ERROR + "LLVM_ROOT (${LLVM_ROOT}) is invalid") +endif() + + +# sanity check LLVM src path +if(NOT EXISTS "${LLVM_SRC}/lib/Transforms" ) + message(FATAL_ERROR + "LLVM_SRC (${LLVM_SRC}) is invalid") +endif() + +# find Python +find_package(PythonInterp) + +# +#set( LLVM_DIR "${LLVM_ROOT}/share/llvm/cmake" ) +#set( CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${LLVM_DIR} ) +#find_package(LLVM) +#include(AddLLVM) +#add_definitions(${LLVM_DEFINITIONS}) +#include_directories(${LLVM_INCLUDE_DIRS}) +#link_directories(${LLVM_LIBRARY_DIRS}) +# Load various LLVM config stuff, +# see http://llvm.org/docs/CMake.html#developing-llvm-passes-out-of-source + +# Load LLVM CMake config +#list(APPEND CMAKE_PREFIX_PATH "${LLVM_ROOT}/share/llvm/cmake") +#set( LLVM_DIR "${LLVM_ROOT}/share/llvm/cmake" ) +#set( CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${LLVM_DIR} ) +find_package(LLVM REQUIRED CONFIG) + +list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}") +include(HandleLLVMOptions) # load additional config +include(AddLLVM) # used to add our own modules + +# propagate LLVM-specific variables to this project +add_definitions(${LLVM_DEFINITIONS} -DHAVE_LLVM) +include_directories(${LLVM_INCLUDE_DIRS}) +# See commit r197394, needed by add_llvm_module in llvm/CMakeLists.txt +set(LLVM_RUNTIME_OUTPUT_INTDIR "${CMAKE_BINARY_DIR}/bin/${CMAKE_CFG_INT_DIR}") +set(LLVM_LIBRARY_OUTPUT_INTDIR "${CMAKE_BINARY_DIR}/lib/${CMAKE_CFG_INT_DIR}") + +set(SOURCES + llvmAggregateGlobalOps.cpp + llvmGlobalToWide.cpp + llvmUtil.cpp + ) + +add_llvm_loadable_module( llvm-pgas ${SOURCES} ) +#set_target_properties( llvm-pgas PROPERTIES COMPILE_FLAGS "-fno-rtti" ) + +# set various configuration settings in the test suite +configure_file("${CMAKE_CURRENT_SOURCE_DIR}/test/lit.cfg.in" "${CMAKE_CURRENT_BINARY_DIR}/test/lit.cfg") + +file( GLOB_RECURSE test_files RELATIVE + "${CMAKE_CURRENT_SOURCE_DIR}/" "test/*.ll" ) +foreach( test_file ${test_files} ) + # test_file is e.g. test/a.ll + # copy the test to the build directory + configure_file("${CMAKE_CURRENT_SOURCE_DIR}/${test_file}" + "${CMAKE_CURRENT_BINARY_DIR}/${test_file}" COPYONLY) +endforeach( test_file ) + +if(NOT EXISTS ${LLVM_ROOT}/bin/FileCheck) + message(FATAL_ERROR "need FileCheck installed to run tests; configure LLVM with -DLLVM_INSTALL_UTILS") +endif() + +# support make check with the LLVM tester lit in the tests directory +add_custom_target(check + COMMAND ${PYTHON_EXECUTABLE} ${LLVM_LIT} + "${CMAKE_CURRENT_BINARY_DIR}/test/" -v + DEPENDS llvm-pgas +) + diff --git a/compiler/util/llvm-global-to-wide/RUN_BUILD b/compiler/util/llvm-global-to-wide/RUN_BUILD new file mode 100755 index 000000000000..e14d14606a61 --- /dev/null +++ b/compiler/util/llvm-global-to-wide/RUN_BUILD @@ -0,0 +1,3 @@ +#!/bin/sh +cd build +make VERBOSE=1 diff --git a/compiler/util/llvm-global-to-wide/RUN_CONFIG b/compiler/util/llvm-global-to-wide/RUN_CONFIG index 12b9e43ce2f8..e42175ad4a62 100755 --- a/compiler/util/llvm-global-to-wide/RUN_CONFIG +++ b/compiler/util/llvm-global-to-wide/RUN_CONFIG @@ -1,12 +1,14 @@ #!/bin/sh #./configure --with-llvmsrc=../../../third-party/llvm/llvm/ --with-llvmobj=../../../third-party/llvm/build/linux64-gnu/ --enable-shared -autoconf -./configure +#autoconf +#./configure ln -s ../llvmGlobalToWide.cpp llvmGlobalToWide.cpp ln -s ../llvmUtil.cpp llvmUtil.cpp ln -s ../llvmAggregateGlobalOps.cpp llvmAggregateGlobalOps.cpp -cd include -ln -s ../../../include/llvmGlobalToWide.h llvmGlobalToWide.h -ln -s ../../../include/llvmUtil.h llvmUtil.h -ln -s ../../../include/llvmAggregateGlobalOps.h llvmAggregateGlobalOps.h -cd .. +ln -s ../../include/llvmGlobalToWide.h llvmGlobalToWide.h +ln -s ../../include/llvmUtil.h llvmUtil.h +ln -s ../../include/llvmAggregateGlobalOps.h llvmAggregateGlobalOps.h +mkdir -p build +cd build +export CMAKE_PREFIX_PATH=$CHPL_HOME/third-party/llvm/install/linux64-gnu/ +cmake .. -DLLVM_ROOT=$CHPL_HOME/third-party/llvm/install/linux64-gnu/ -DLLVM_SRC=$CHPL_HOME/third-party/llvm/llvm diff --git a/compiler/util/llvm-global-to-wide/RUN_TEST b/compiler/util/llvm-global-to-wide/RUN_TEST index 67bdeb8e04dc..14286a3d87c5 100755 --- a/compiler/util/llvm-global-to-wide/RUN_TEST +++ b/compiler/util/llvm-global-to-wide/RUN_TEST @@ -1,3 +1,3 @@ #!/bin/sh -../../../third-party/llvm/llvm/utils/lit/lit.py test/ -v - +cd build +make check diff --git a/compiler/util/llvm-global-to-wide/test/a.ll b/compiler/util/llvm-global-to-wide/test/a.ll index 42c443113cc2..8da4d9fb9e3e 100644 --- a/compiler/util/llvm-global-to-wide/test/a.ll +++ b/compiler/util/llvm-global-to-wide/test/a.ll @@ -1,13 +1,13 @@ -; RUN: opt --load libglobal-to-wide.so -global-to-wide -S < %s | FileCheck %s +; RUN: opt --load %bindir/lib/llvm-pgas${MOD_EXT} -global-to-wide -S < %s | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128-p100:64:64:64" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128-p100:128:64:64-p101:128:64:64" declare void @llvm.memcpy.p0i8.p100i8.i64(i8* nocapture, i8 addrspace(100)* nocapture, i64, i32, i1) declare void @llvm.memcpy.p100i8.p0i8.i64(i8 addrspace(100)* nocapture, i8* nocapture, i64, i32, i1) declare void @llvm.memcpy.p100i8.p100i8.i64(i8 addrspace(100)* nocapture, i8 addrspace(100)* nocapture, i64, i32, i1) define void @teststore(i64 addrspace(100)* %storeme) { -; CHECK: @teststore(i64* % +; CHECK: @teststore({ %struct.c_localeid_t, i64* } % ; ) ; CHECK-NOT: @memcpy ; CHECK: put @@ -24,7 +24,7 @@ entry: define i64 @testload(i64 addrspace(100)* %loadme) { -; CHECK: @testload(i64* % +; CHECK: @testload({ %struct.c_localeid_t, i64* } % ; ) ; CHECK-NOT: @memcpy ; CHECK: get @@ -35,12 +35,12 @@ entry: %loadme.cast = bitcast i64 addrspace(100)* %loadme to i8 addrspace(100)* %a.cast = bitcast i64 * %a to i8 * call void @llvm.memcpy.p0i8.p100i8.i64(i8* %a.cast, i8 addrspace(100)* %loadme.cast, i64 8, i32 1, i1 true) - %ret = load i64 * %a + %ret = load i64, i64 * %a ret i64 %ret } define void @testcopy(i64 addrspace(100)* %dst, i64 addrspace(100)* %src) { -; CHECK: @testcopy(i64* % +; CHECK: @testcopy({ %struct.c_localeid_t, i64* } % ; ) ; CHECK-NOT: @memcpy ; CHECK: getput @@ -54,19 +54,19 @@ entry: } define i64 @read_int(i64 addrspace(100)* %src) { -; CHECK: i64 @read_int(i64* % +; CHECK: i64 @read_int({ %struct.c_localeid_t, i64* } % ; ) ; CHECK-NOT: @.gf ; CHECK: get ; CHECK-NOT: @.gf ; CHECK: ret i64 entry: - %ret = load i64 addrspace(100)* %src + %ret = load i64, i64 addrspace(100)* %src ret i64 %ret } define void @write_int(i64 addrspace(100)* %dst, i64 %v) { -; CHECK: void @write_int(i64* % +; CHECK: void @write_int({ %struct.c_localeid_t, i64* } % ; ) ; CHECK-NOT: @.gf ; CHECK: put diff --git a/compiler/util/llvm-global-to-wide/test/aggregate.ll b/compiler/util/llvm-global-to-wide/test/aggregate.ll index b04b2784bcf3..1e8779017b25 100644 --- a/compiler/util/llvm-global-to-wide/test/aggregate.ll +++ b/compiler/util/llvm-global-to-wide/test/aggregate.ll @@ -1,6 +1,6 @@ -; RUN: opt --load libglobal-to-wide.so -aggregate-global-ops -S < %s | FileCheck %s +; RUN: opt --load %bindir/lib/llvm-pgas${MOD_EXT} -aggregate-global-ops -S < %s | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128-p100:64:64:64" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128-p100:128:64:64-p101:128:64:64" declare void @llvm.memcpy.p0i8.p100i8.i64(i8* nocapture, i8 addrspace(100)* nocapture, i64, i32, i1) declare void @llvm.memcpy.p100i8.p0i8.i64(i8 addrspace(100)* nocapture, i8* nocapture, i64, i32, i1) @@ -11,9 +11,9 @@ define void @teststore1(i64 addrspace(100)* %base) { ; CHECK: @teststore1( ; ) entry: - %p0 = getelementptr inbounds i64 addrspace(100)* %base, i32 0 - %p1 = getelementptr inbounds i64 addrspace(100)* %base, i32 1 - %p2 = getelementptr inbounds i64 addrspace(100)* %base, i32 2 + %p0 = getelementptr inbounds i64, i64 addrspace(100)* %base, i32 0 + %p1 = getelementptr inbounds i64, i64 addrspace(100)* %base, i32 1 + %p2 = getelementptr inbounds i64, i64 addrspace(100)* %base, i32 2 ; CHECK: store i64 1 ; CHECK: store i64 2 ; CHECK: store i64 3 @@ -34,11 +34,11 @@ entry: ; CHECK: store i64 3 ; CHECK: memcpy ; CHECK: ret - %p0 = getelementptr inbounds i64 addrspace(100)* %base, i32 0 + %p0 = getelementptr inbounds i64, i64 addrspace(100)* %base, i32 0 store i64 1, i64 addrspace(100)* %p0, align 8 - %p1 = getelementptr inbounds i64 addrspace(100)* %base, i32 1 + %p1 = getelementptr inbounds i64, i64 addrspace(100)* %base, i32 1 store i64 2, i64 addrspace(100)* %p1, align 8 - %p2 = getelementptr inbounds i64 addrspace(100)* %base, i32 2 + %p2 = getelementptr inbounds i64, i64 addrspace(100)* %base, i32 2 store i64 3, i64 addrspace(100)* %p2, align 8 ret void } @@ -47,9 +47,9 @@ define void @teststore3(i64 addrspace(100)* %base) { ; CHECK: @teststore3( ; ) entry: - %p2 = getelementptr inbounds i64 addrspace(100)* %base, i32 2 - %p1 = getelementptr inbounds i64 addrspace(100)* %base, i32 1 - %p0 = getelementptr inbounds i64 addrspace(100)* %base, i32 0 + %p2 = getelementptr inbounds i64, i64 addrspace(100)* %base, i32 2 + %p1 = getelementptr inbounds i64, i64 addrspace(100)* %base, i32 1 + %p0 = getelementptr inbounds i64, i64 addrspace(100)* %base, i32 0 ; CHECK: store i64 3 ; CHECK: store i64 2 ; CHECK: store i64 1 @@ -71,12 +71,12 @@ define i64 @testload1(i64 addrspace(100)* %base) { ; CHECK: load ; CHECK: ret entry: - %p0 = getelementptr inbounds i64 addrspace(100)* %base, i32 0 - %p1 = getelementptr inbounds i64 addrspace(100)* %base, i32 1 - %p2 = getelementptr inbounds i64 addrspace(100)* %base, i32 2 - %v1 = load i64 addrspace(100)* %p0, align 8 - %v2 = load i64 addrspace(100)* %p1, align 8 - %v3 = load i64 addrspace(100)* %p2, align 8 + %p0 = getelementptr inbounds i64, i64 addrspace(100)* %base, i32 0 + %p1 = getelementptr inbounds i64, i64 addrspace(100)* %base, i32 1 + %p2 = getelementptr inbounds i64, i64 addrspace(100)* %base, i32 2 + %v1 = load i64, i64 addrspace(100)* %p0, align 8 + %v2 = load i64, i64 addrspace(100)* %p1, align 8 + %v3 = load i64, i64 addrspace(100)* %p2, align 8 %sum1 = add i64 %v1, %v2 %sum2 = add i64 %sum1, %v3 ret i64 %sum2 @@ -91,12 +91,12 @@ define i64 @testload2(i64 addrspace(100)* %base) { ; CHECK: load ; CHECK: ret entry: - %p0 = getelementptr inbounds i64 addrspace(100)* %base, i32 0 - %v1 = load i64 addrspace(100)* %p0, align 8 - %p1 = getelementptr inbounds i64 addrspace(100)* %base, i32 1 - %v2 = load i64 addrspace(100)* %p1, align 8 - %p2 = getelementptr inbounds i64 addrspace(100)* %base, i32 2 - %v3 = load i64 addrspace(100)* %p2, align 8 + %p0 = getelementptr inbounds i64, i64 addrspace(100)* %base, i32 0 + %v1 = load i64, i64 addrspace(100)* %p0, align 8 + %p1 = getelementptr inbounds i64, i64 addrspace(100)* %base, i32 1 + %v2 = load i64, i64 addrspace(100)* %p1, align 8 + %p2 = getelementptr inbounds i64, i64 addrspace(100)* %base, i32 2 + %v3 = load i64, i64 addrspace(100)* %p2, align 8 %sum1 = add i64 %v1, %v2 %sum2 = add i64 %sum1, %v3 ret i64 %sum2 @@ -111,12 +111,12 @@ define i64 @testload3(i64 addrspace(100)* %base) { ; CHECK: load ; CHECK: ret entry: - %p0 = getelementptr inbounds i64 addrspace(100)* %base, i32 0 - %v1 = load i64 addrspace(100)* %p0, align 8 - %p1 = getelementptr inbounds i64 addrspace(100)* %base, i32 4 - %v2 = load i64 addrspace(100)* %p1, align 8 - %p2 = getelementptr inbounds i64 addrspace(100)* %base, i32 8 - %v3 = load i64 addrspace(100)* %p2, align 8 + %p0 = getelementptr inbounds i64, i64 addrspace(100)* %base, i32 0 + %v1 = load i64, i64 addrspace(100)* %p0, align 8 + %p1 = getelementptr inbounds i64, i64 addrspace(100)* %base, i32 4 + %v2 = load i64, i64 addrspace(100)* %p1, align 8 + %p2 = getelementptr inbounds i64, i64 addrspace(100)* %base, i32 8 + %v3 = load i64, i64 addrspace(100)* %p2, align 8 %sum1 = add i64 %v1, %v2 %sum2 = add i64 %sum1, %v3 ret i64 %sum2 @@ -131,13 +131,13 @@ define i64 @testload4(i64 addrspace(100)* %base) { ; CHECK: load ; CHECK: ret entry: - %p0 = getelementptr inbounds i64 addrspace(100)* %base, i32 0 - %v1 = load i64 addrspace(100)* %p0, align 8 - %p1 = getelementptr inbounds i64 addrspace(100)* %base, i32 4 - %v2 = load i64 addrspace(100)* %p1, align 8 + %p0 = getelementptr inbounds i64, i64 addrspace(100)* %base, i32 0 + %v1 = load i64, i64 addrspace(100)* %p0, align 8 + %p1 = getelementptr inbounds i64, i64 addrspace(100)* %base, i32 4 + %v2 = load i64, i64 addrspace(100)* %p1, align 8 %sum1 = add i64 %v1, %v2 - %p2 = getelementptr inbounds i64 addrspace(100)* %base, i32 8 - %v3 = load i64 addrspace(100)* %p2, align 8 + %p2 = getelementptr inbounds i64, i64 addrspace(100)* %base, i32 8 + %v3 = load i64, i64 addrspace(100)* %p2, align 8 %sum2 = add i64 %sum1, %v3 ret i64 %sum2 } @@ -151,13 +151,13 @@ define i64 @testload5(i64 addrspace(100)* %base) { ; CHECK: load ; CHECK: ret entry: - %p0 = getelementptr inbounds i64 addrspace(100)* %base, i32 3 - %v1 = load i64 addrspace(100)* %p0, align 8 - %p1 = getelementptr inbounds i64 addrspace(100)* %base, i32 1 - %v2 = load i64 addrspace(100)* %p1, align 8 + %p0 = getelementptr inbounds i64, i64 addrspace(100)* %base, i32 3 + %v1 = load i64, i64 addrspace(100)* %p0, align 8 + %p1 = getelementptr inbounds i64, i64 addrspace(100)* %base, i32 1 + %v2 = load i64, i64 addrspace(100)* %p1, align 8 %sum1 = add i64 %v1, %v2 - %p2 = getelementptr inbounds i64 addrspace(100)* %base, i32 2 - %v3 = load i64 addrspace(100)* %p2, align 8 + %p2 = getelementptr inbounds i64, i64 addrspace(100)* %base, i32 2 + %v3 = load i64, i64 addrspace(100)* %p2, align 8 %sum2 = add i64 %sum1, %v3 ret i64 %sum2 } @@ -166,9 +166,9 @@ define void @teststoreatomic(i64 addrspace(100)* %base) { ; CHECK: @teststoreatomic( ; ) entry: - %p0 = getelementptr inbounds i64 addrspace(100)* %base, i32 0 - %p1 = getelementptr inbounds i64 addrspace(100)* %base, i32 1 - %p2 = getelementptr inbounds i64 addrspace(100)* %base, i32 2 + %p0 = getelementptr inbounds i64, i64 addrspace(100)* %base, i32 0 + %p1 = getelementptr inbounds i64, i64 addrspace(100)* %base, i32 1 + %p2 = getelementptr inbounds i64, i64 addrspace(100)* %base, i32 2 ; CHECK: store atomic i64 1 ; CHECK: store atomic i64 2 ; CHECK: store atomic i64 3 diff --git a/compiler/util/llvm-global-to-wide/test/b.ll b/compiler/util/llvm-global-to-wide/test/b.ll index 005bede753b1..535ff570b2c4 100644 --- a/compiler/util/llvm-global-to-wide/test/b.ll +++ b/compiler/util/llvm-global-to-wide/test/b.ll @@ -1,6 +1,6 @@ -; RUN: opt --load libglobal-to-wide.so -global-to-wide -S < %s | FileCheck %s +; RUN: opt --load %bindir/lib/llvm-pgas${MOD_EXT} -global-to-wide -S < %s | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128-p100:64:64:64" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128-p100:128:64:64-p101:128:64:64" ; Check wide pointer manipulation functions. %struct.c_localeid_t = type { i32, i32 } @@ -11,48 +11,51 @@ declare %struct.c_localeid_t @.gf.loc.1(i64 addrspace(100)*) readnone declare i64 addrspace(100)* @.gf.make.1(%struct.c_localeid_t, i64*) readnone define i64* @testaddr(i64 addrspace(100)* %w) { -; CHECK: @testaddr(i64* % +; CHECK: @testaddr({ %struct.c_localeid_t, i64* } % ; ) ; CHECK-NOT: @.gf -; CHECK: wide_ptr_get_address +; CHECK: %ret = extractvalue { %struct.c_localeid_t, i64* } %w, 1 ; CHECK-NOT: @.gf -; CHECK: ret i64* +; CHECK: ret i64* %ret entry: %ret = call i64* @.gf.addr.1(i64 addrspace(100)* %w) ret i64* %ret } define i32 @testnode(i64 addrspace(100)* %w) { -; CHECK: @testnode(i64* % +; CHECK: @testnode({ %struct.c_localeid_t, i64* } % ; ) ; CHECK-NOT: @.gf -; CHECK: wide_ptr_get_node +; CHECK: extractvalue { %struct.c_localeid_t, i64* } %w, 0, 0 ; CHECK-NOT: @.gf -; CHECK: ret i32 +; CHECK: ret i32 %ret entry: %ret = call i32 @.gf.node.1(i64 addrspace(100)* %w) ret i32 %ret } define %struct.c_localeid_t @testloc(i64 addrspace(100)* %w) { -; CHECK: @testloc(i64* % +; CHECK: @testloc({ %struct.c_localeid_t, i64* } % ; ) ; CHECK-NOT: @.gf -; CHECK: wide_ptr_read_localeID +; CHECK: %ret = extractvalue { %struct.c_localeid_t, i64* } %w, 0 ; CHECK-NOT: @.gf -; CHECK: ret %struct.c_localeid_t +; CHECK: ret %struct.c_localeid_t %ret entry: %ret = call %struct.c_localeid_t @.gf.loc.1(i64 addrspace(100)* %w) ret %struct.c_localeid_t %ret } define i64 addrspace(100)* @testmake(%struct.c_localeid_t %loc, i64* %addr) { -; CHECK: i64* @testmake( +; CHECK: { %struct.c_localeid_t, i64* } @testmake( ; ) ; CHECK-NOT: @.gf -; CHECK: return_wide_ptr_loc +; CHECK: insertvalue { %struct.c_localeid_t, i64* } +; CHECK: %struct.c_localeid_t %loc, 0 +; CHECK: insertvalue { %struct.c_localeid_t, i64* } +; CHECK: i64* %addr ; CHECK-NOT: @.gf -; CHECK: ret i64* +; CHECK: ret { %struct.c_localeid_t, i64* } entry: %ret = call i64 addrspace(100)* @.gf.make.1(%struct.c_localeid_t %loc, i64* %addr) ret i64 addrspace(100)* %ret diff --git a/compiler/util/llvm-global-to-wide/test/c.ll b/compiler/util/llvm-global-to-wide/test/c.ll index 32357e7da3b3..0cdf6e9f54a4 100644 --- a/compiler/util/llvm-global-to-wide/test/c.ll +++ b/compiler/util/llvm-global-to-wide/test/c.ll @@ -1,6 +1,6 @@ -; RUN: opt --load libglobal-to-wide.so -global-to-wide -S < %s | FileCheck %s +; RUN: opt --load %bindir/lib/llvm-pgas${MOD_EXT} -global-to-wide -S < %s | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128-p100:64:64:64" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128-p100:128:64:64-p101:128:64:64" ; Test type promotion %struct.c_localeid_t = type { i32, i32 } @@ -12,7 +12,7 @@ declare %struct.c_localeid_t @.gf.loc.1(i64 addrspace(100)*) readnone declare i64 addrspace(100)* @.gf.make.1(%struct.c_localeid_t, i64*) readnone %mystruct = type { i64 addrspace(100)*, i64 addrspace(100)*, i32 *} -; CHECK: %mystruct = type { i64*, i64*, i32* } +; CHECK: %mystruct = type { { %struct.c_localeid_t, i64* }, { %struct.c_localeid_t, i64* }, i32* } declare %mystruct* @.gf.addr.2(%mystruct addrspace(100)*) readnone declare i32 @.gf.node.2(%mystruct addrspace(100)*) readnone @@ -21,60 +21,60 @@ declare %mystruct addrspace(100)* @.gf.make.2(%struct.c_localeid_t, %mystruct*) define i64 addrspace(100)* @get_one(%mystruct addrspace(100)* %s) { -; CHECK: i64* @get_one(%mystruct* % +; CHECK: { %struct.c_localeid_t, i64* } @get_one({ %struct.c_localeid_t, %mystruct* } % ; ) ; CHECK-NOT: @.gf -; CHECK: ret i64* +; CHECK: ret { %struct.c_localeid_t, i64* } entry: - %gep = getelementptr inbounds %mystruct addrspace(100)* %s, i32 0, i32 0 - %ptr = load i64 addrspace(100)* addrspace(100)* %gep + %gep = getelementptr inbounds %mystruct, %mystruct addrspace(100)* %s, i32 0, i32 0 + %ptr = load i64 addrspace(100)*, i64 addrspace(100)* addrspace(100)* %gep ret i64 addrspace(100)* %ptr } define i64 addrspace(100)* @get_two(%mystruct addrspace(100)* %s) { -; CHECK: i64* @get_two(%mystruct* % +; CHECK: { %struct.c_localeid_t, i64* } @get_two({ %struct.c_localeid_t, %mystruct* } ; ) ; CHECK-NOT: @.gf -; CHECK: ret i64* +; CHECK: ret { %struct.c_localeid_t, i64* } entry: - %gep = getelementptr inbounds %mystruct addrspace(100)* %s, i32 0, i32 1 - %ptr = load i64 addrspace(100)* addrspace(100)* %gep + %gep = getelementptr inbounds %mystruct, %mystruct addrspace(100)* %s, i32 0, i32 1 + %ptr = load i64 addrspace(100)*, i64 addrspace(100)* addrspace(100)* %gep ret i64 addrspace(100)* %ptr } define i32* @get_three(%mystruct addrspace(100)* %s) { -; CHECK: i32* @get_three(%mystruct* % +; CHECK: i32* @get_three({ %struct.c_localeid_t, %mystruct* } % ; ) ; CHECK-NOT: @.gf ; CHECK: ret i32* entry: - %gep = getelementptr inbounds %mystruct addrspace(100)* %s, i32 0, i32 2 - %ptr = load i32* addrspace(100)* %gep + %gep = getelementptr inbounds %mystruct, %mystruct addrspace(100)* %s, i32 0, i32 2 + %ptr = load i32*, i32* addrspace(100)* %gep ret i32* %ptr } define i64 @read_int(%mystruct addrspace(100)* %s) { -; CHECK: i64 @read_int(%mystruct* % +; CHECK: i64 @read_int({ %struct.c_localeid_t, %mystruct* } % ; ) ; CHECK-NOT: @.gf ; CHECK: get ; CHECK-NOT: @.gf ; CHECK: ret i64 entry: - %gep = getelementptr inbounds %mystruct addrspace(100)* %s, i32 0, i32 0 - %ptr = load i64 addrspace(100)* addrspace(100)* %gep - %ret = load i64 addrspace(100)* %ptr + %gep = getelementptr inbounds %mystruct, %mystruct addrspace(100)* %s, i32 0, i32 0 + %ptr = load i64 addrspace(100)*, i64 addrspace(100)* addrspace(100)* %gep + %ret = load i64, i64 addrspace(100)* %ptr ret i64 %ret } define void @write_int(%mystruct addrspace(100)* %s, i64 %v) { -; CHECK: void @write_int(%mystruct* % +; CHECK: void @write_int({ %struct.c_localeid_t, %mystruct* } % ; ) ; CHECK-NOT: @.gf ; CHECK: put ; CHECK-NOT: @.gf ; CHECK: ret void entry: - %gep = getelementptr inbounds %mystruct addrspace(100)* %s, i32 0, i32 0 - %ptr = load i64 addrspace(100)* addrspace(100)* %gep + %gep = getelementptr inbounds %mystruct, %mystruct addrspace(100)* %s, i32 0, i32 0 + %ptr = load i64 addrspace(100)*, i64 addrspace(100)* addrspace(100)* %gep store i64 %v, i64 addrspace(100)* %ptr ret void } diff --git a/compiler/util/llvm-global-to-wide/test/d.ll b/compiler/util/llvm-global-to-wide/test/d.ll index 8a38531c0346..acf06e260bcb 100644 --- a/compiler/util/llvm-global-to-wide/test/d.ll +++ b/compiler/util/llvm-global-to-wide/test/d.ll @@ -1,6 +1,6 @@ -; RUN: opt --load libglobal-to-wide.so -global-to-wide -S < %s | FileCheck %s +; RUN: opt --load %bindir/lib/llvm-pgas${MOD_EXT} -global-to-wide -S < %s | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128-p100:64:64:64" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128-p100:128:64:64-p101:128:64:64" ; Test recursive type promotion %struct.c_localeid_t = type { i32, i32 } @@ -10,11 +10,11 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ;%type_two = type opaque %recurs = type { %recurs addrspace(100)* } -; CHECK: %recurs = type { %recurs* } +; CHECK: %recurs = type { { %struct.c_localeid_t, %recurs* } } %type_one = type { %type_two addrspace(100)* } -; CHECK: %type_one = type { %type_two* } +; CHECK: %type_one = type { { %struct.c_localeid_t, %type_two* } } %type_two = type { %type_one addrspace(100)* } -; CHECK: %type_two = type { %type_one* } +; CHECK: %type_two = type { { %struct.c_localeid_t, %type_one* } } ; A function to keep those types from dissapearing define void @test(%recurs %a, %type_one %b, %type_two %c) { diff --git a/compiler/util/llvm-global-to-wide/test/e.ll b/compiler/util/llvm-global-to-wide/test/e.ll index f9a49dfca692..c59afd491d6f 100644 --- a/compiler/util/llvm-global-to-wide/test/e.ll +++ b/compiler/util/llvm-global-to-wide/test/e.ll @@ -1,6 +1,6 @@ -; RUN: opt --load libglobal-to-wide.so -global-to-wide -S < %s | FileCheck %s +; RUN: opt --load %bindir/lib/llvm-pgas${MOD_EXT} -global-to-wide -S < %s | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128-p100:64:64:64" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128-p100:128:64:64-p101:128:64:64" ; try a phi node %struct.c_localeid_t = type { i32, i32 } @@ -12,7 +12,7 @@ declare %struct.c_localeid_t @.gf.loc.1(i64 addrspace(100)*) readnone declare i64 addrspace(100)* @.gf.make.1(%struct.c_localeid_t, i64*) readnone %mystruct = type { i64 addrspace(100)*, i64 addrspace(100)*, i32 *} -; CHECK: %mystruct = type { i64*, i64*, i32* } +; CHECK: %mystruct = type { { %struct.c_localeid_t, i64* }, { %struct.c_localeid_t, i64* }, i32* } declare %mystruct* @.gf.addr.2(%mystruct addrspace(100)*) readnone declare i32 @.gf.node.2(%mystruct addrspace(100)*) readnone @@ -55,7 +55,7 @@ entry: br label %G_loop G_loop: %in = phi %mystruct addrspace(100)* [ %s, %entry ], [ %ptr, %G_loop ] - %ptr = getelementptr inbounds %mystruct addrspace(100)* %in, i32 1 + %ptr = getelementptr inbounds %mystruct, %mystruct addrspace(100)* %in, i32 1 br i1 %a, label %G_loop, label %G_end G_end: ret void diff --git a/compiler/util/llvm-global-to-wide/test/f.ll b/compiler/util/llvm-global-to-wide/test/f.ll index fe525f380fec..471a72ced5a3 100644 --- a/compiler/util/llvm-global-to-wide/test/f.ll +++ b/compiler/util/llvm-global-to-wide/test/f.ll @@ -1,6 +1,6 @@ -; RUN: opt --load libglobal-to-wide.so -global-to-wide -S < %s | FileCheck %s +; RUN: opt --load %bindir/lib/llvm-pgas${MOD_EXT} -global-to-wide -S < %s | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128-p100:64:64:64" +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128-p100:128:64:64-p101:128:64:64" ; try a phi node %struct.c_localeid_t = type { i32, i32 } @@ -12,7 +12,7 @@ declare %struct.c_localeid_t @.gf.loc.1(i64 addrspace(100)*) readnone declare i64 addrspace(100)* @.gf.make.1(%struct.c_localeid_t, i64*) readnone %mystruct = type { i64 addrspace(100)*, i64 addrspace(100)*, i32 *} -; CHECK: %mystruct = type { i64*, i64*, i32* } +; CHECK: %mystruct = type { { %struct.c_localeid_t, i64* }, { %struct.c_localeid_t, i64* }, i32* } declare %mystruct* @.gf.addr.2(%mystruct addrspace(100)*) readnone declare i32 @.gf.node.2(%mystruct addrspace(100)*) readnone diff --git a/compiler/util/llvm-global-to-wide/test/g.ll b/compiler/util/llvm-global-to-wide/test/g.ll new file mode 100644 index 000000000000..3c10cb84435e --- /dev/null +++ b/compiler/util/llvm-global-to-wide/test/g.ll @@ -0,0 +1,37 @@ +; RUN: opt --load %bindir/lib/llvm-pgas${MOD_EXT} -global-to-wide -S < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128-p100:128:64:64-p101:128:64:64" + +declare void @llvm.memcpy.p0i8.p100i8.i64(i8* nocapture, i8 addrspace(100)* nocapture, i64, i32, i1) +declare void @llvm.memcpy.p100i8.p0i8.i64(i8 addrspace(100)* nocapture, i8* nocapture, i64, i32, i1) +declare void @llvm.memcpy.p100i8.p100i8.i64(i8 addrspace(100)* nocapture, i8 addrspace(100)* nocapture, i64, i32, i1) + +define i128 @testptrtoint(double addrspace(100)* %ptr) { +; CHECK: i128 @testptrtoint( +; CHECK: { %struct.c_localeid_t, double* } % +; ) +; CHECK: alloca i128 +; CHECK: bitcast i128* % +; CHECK: to { %struct.c_localeid_t, double* }* +; CHECK: store { %struct.c_localeid_t, double* } %ptr +; CHECK: load i128, i128* % +; CHECK: ret i128 +entry: + %ret = ptrtoint double addrspace(100)* %ptr to i128 + ret i128 %ret +} + + +define double addrspace(100)* @testinttoptr(i128 %i) { +; CHECK: { %struct.c_localeid_t, double* } @testinttoptr(i128 % +; ) +; CHECK: alloca { %struct.c_localeid_t, double* } +; CHECK: bitcast { %struct.c_localeid_t, double* }* % +; CHECK: to i128* +; CHECK: store i128 % +; CHECK: load { %struct.c_localeid_t, double* }, { %struct.c_localeid_t, double* }* +; CHECK: ret { %struct.c_localeid_t, double* } +entry: + %ret = inttoptr i128 %i to double addrspace(100)* + ret double addrspace(100)* %ret +} diff --git a/compiler/util/llvm-global-to-wide/test/lit.cfg.in b/compiler/util/llvm-global-to-wide/test/lit.cfg.in new file mode 100644 index 000000000000..b92d43668cfd --- /dev/null +++ b/compiler/util/llvm-global-to-wide/test/lit.cfg.in @@ -0,0 +1,31 @@ +import os +import re +import platform +from lit import formats + +config.name = 'llvm-pgas' +config.test_source_root = os.path.join("@CMAKE_CURRENT_SOURCE_DIR@", 'test') + +# Configuration file for the 'lit' test runner. +config.test_format = formats.ShTest(True) + +# suffixes: A list of file extensions to treat as test files. This is overriden +# by individual lit.local.cfg files in the test subdirectories. +config.suffixes = ['.ll', '.c', '.cpp', '.test', '.txt', '.s'] + +# excludes: A list of directories to exclude from the testsuite. The 'Inputs' +# subdirectories contain auxiliary inputs for various tests in their parent +# directories. +config.excludes = ['CMakeCache.txt', 'CMakeFiles', 'CMakeLists.txt'] + +config.substitutions.append(('%bindir', "@CMAKE_BINARY_DIR@")) + + +# environment: Set PATH as required +config.environment['PATH'] = os.pathsep.join([os.path.join("@LLVM_ROOT@", "bin"), + config.environment['PATH']]) +config.environment['LLVM_ROOT'] = "@LLVM_ROOT@" +config.environment['CMAKE_SOURCE_DIR'] = "@CMAKE_SOURCE_DIR@" +config.environment['MOD_EXT'] = "@CMAKE_SHARED_LIBRARY_SUFFIX@" + +# vim:ft=python diff --git a/compiler/util/llvmAggregateGlobalOps.cpp b/compiler/util/llvmAggregateGlobalOps.cpp index 95c53402830a..5deec1c795b6 100644 --- a/compiler/util/llvmAggregateGlobalOps.cpp +++ b/compiler/util/llvmAggregateGlobalOps.cpp @@ -1,15 +1,15 @@ /* * Copyright 2004-2017 Cray Inc. * Other additional copyright holders may be indicated within. - * + * * The entirety of this work is licensed under the Apache License, * Version 2.0 (the "License"); you may not use this file except * in compliance with the License. - * + * * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -31,8 +31,8 @@ // memcpy(%tmp, %p, ...) // %i1 = getelementptr ... %tmp, ..., 1 // %i2 = getelementptr ... %tmp ..., 2 -// %v1 = load %i1 -// %v2 = load %i2 +// %v1 = load %i1 +// %v2 = load %i2 // // This optimization doesn't worry about combining such loads // or stores into memcpys or memsets since MemCpyOptimizer @@ -49,6 +49,7 @@ #include "llvm/Pass.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Debug.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" @@ -77,6 +78,8 @@ #include #include +#include +#include using namespace llvm; @@ -84,9 +87,9 @@ namespace { static const bool DEBUG = false; -static const bool extraChecks = false; +static const bool extraChecks = true; // Set a function name here to get lots of debugging output. -static const char* debugThisFn = ""; +static const char* debugThisFn = "";//"deinit6"; // If there is a gap between memory that we are loading, @@ -97,19 +100,21 @@ static const char* debugThisFn = ""; #define GET_EXTRA 64 static inline -bool isGlobalLoadOrStore(Instruction* I, - unsigned globalSpace, - bool findLoad, bool findStore) +bool isMergeableGlobalLoadOrStore(Instruction* I, + unsigned globalSpace, + bool findLoad, bool findStore) { if( findLoad && isa(I) ) { LoadInst *load = cast(I); - if( load->getPointerAddressSpace() == globalSpace ) { + if( load->getPointerAddressSpace() == globalSpace && + load->isSimple() ) { return true; } } if( findStore && isa(I)) { StoreInst *store = cast(I); - if( store->getPointerAddressSpace() == globalSpace ) { + if( store->getPointerAddressSpace() == globalSpace && + store->isSimple() ) { return true; } } @@ -128,33 +133,6 @@ Value* getLoadStorePointer(Instruction* I) } return NULL; } -static -Value* rebasePointer(Value* ptr, Value* oldBase, Value* newBase, const Twine &name, - IRBuilder<>* builder, const DataLayout &TD, - Value* oldBaseI, Value* newBaseI) -{ - Type* iPtrTy = TD.getIntPtrType(ptr->getType()); - Type* localPtrTy = ptr->getType()->getPointerElementType()->getPointerTo(0); - - Value* ret; - - if( ptr != oldBase ) { - // compute newBase + (ptr - oldBase) - Value* pI = builder->CreatePtrToInt(ptr, iPtrTy, name + ".ptr.i"); - assert( oldBaseI ); - assert( newBaseI ); - // then subtract - Value* diff = builder->CreateSub(pI, oldBaseI, name + ".diff"); - // then make sure same type - Value* ext = builder->CreateSExtOrTrunc(diff, newBaseI->getType(), ".ext.i"); - // Now add - Value* sum = builder->CreateAdd(newBaseI, ext, name + ".sum"); - ret = builder->CreateIntToPtr(sum, localPtrTy, name + ".cast"); - } else { - ret = builder->CreatePointerCast(newBase, localPtrTy, name + ".cast"); - } - return ret; -} // Given a start and end load/store instruction (in the same basic block), // reorder the instructions so that the addressing instructions are @@ -223,11 +201,15 @@ Instruction* reorderAddressingMemopsUses(Instruction *FirstLoadOrStore, // Leave loads/stores where they are (they will be removed) if( isa(insn) || isa(insn) ) { if( DebugThis ) { - errs() << "found load/store: "; insn->dump(); + dbgs() << "found load/store: "; + insn->print(dbgs(), true); + dbgs() << '\n'; } } else if( memopsUses.count(insn) ) { if( DebugThis ) { - errs() << "found memop use: "; insn->dump(); + dbgs() << "found memop use: "; + insn->print(dbgs(), true); + dbgs() << '\n'; } // Move uses of memops to after the final memop. insn->removeFromParent(); @@ -235,7 +217,9 @@ Instruction* reorderAddressingMemopsUses(Instruction *FirstLoadOrStore, LastMemopUse = insn; } else { if( DebugThis ) { - errs() << "found other: "; insn->dump(); + dbgs() << "found other: "; + insn->print(dbgs(), true); + dbgs() << '\n'; } // Move addressing instructions to before the first memop. insn->removeFromParent(); @@ -250,8 +234,10 @@ Instruction* reorderAddressingMemopsUses(Instruction *FirstLoadOrStore, // The next several fns are stolen almost totally unmodified from MemCpyOptimizer. // modified code areas say CUSTOM. -static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, - bool &VariableIdxFound, const DataLayout &TD){ +static int64_t GetOffsetFromIndex(const GEPOperator *GEP, + unsigned Idx, + bool &VariableIdxFound, + const DataLayout &DL){ // Skip over the first indices. gep_type_iterator GTI = gep_type_begin(GEP); for (unsigned i = 1; i != Idx; ++i, ++GTI) @@ -261,7 +247,7 @@ static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, int64_t Offset = 0; for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { ConstantInt *OpC = dyn_cast(GEP->getOperand(i)); - if (OpC == 0) + if (!OpC) return VariableIdxFound = true; if (OpC->isZero()) continue; // No offset. @@ -272,13 +258,13 @@ static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, if (StructType *STy = dyn_cast(*GTI)) #endif { - Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); + Offset += DL.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); continue; } // Otherwise, we have a sequential type like an array or vector. Multiply // the index by the ElementSize. - uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); + uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType()); Offset += Size*OpC->getSExtValue(); } @@ -288,9 +274,16 @@ static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, /// constant offset, and return that constant offset. For example, Ptr1 might /// be &A[42], and Ptr2 might be &A[40]. In this case offset would be -8. static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, - const DataLayout &TD) { + const DataLayout &DL) { Ptr1 = Ptr1->stripPointerCasts(); Ptr2 = Ptr2->stripPointerCasts(); + + // Handle the trivial case first. + if (Ptr1 == Ptr2) { + Offset = 0; + return true; + } + GEPOperator *GEP1 = dyn_cast(Ptr1); GEPOperator *GEP2 = dyn_cast(Ptr2); @@ -298,13 +291,13 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, // If one pointer is a GEP and the other isn't, then see if the GEP is a // constant offset from the base, as in "P" and "gep P, 1". - if (GEP1 && GEP2 == 0 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) { - Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, TD); + if (GEP1 && !GEP2 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) { + Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, DL); return !VariableIdxFound; } - if (GEP2 && GEP1 == 0 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) { - Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD); + if (GEP2 && !GEP1 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) { + Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, DL); return !VariableIdxFound; } @@ -322,8 +315,8 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx)) break; - int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD); - int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD); + int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, DL); + int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, DL); if (VariableIdxFound) return false; Offset = Offset2-Offset1; @@ -351,8 +344,8 @@ struct MemOpRanges { // from MemsetRanges in MemCpyOptimizer /// because each element is relatively large and expensive to copy. std::list Ranges; typedef std::list::iterator range_iterator; - const DataLayout &TD; - MemOpRanges(const DataLayout &td) : TD(td) { } + const DataLayout &DL; + MemOpRanges(const DataLayout &td) : DL(td) { } typedef std::list::const_iterator const_iterator; const_iterator begin() const { return Ranges.begin(); } const_iterator end() const { return Ranges.end(); } @@ -376,7 +369,7 @@ struct MemOpRanges { // from MemsetRanges in MemCpyOptimizer } } void addStore(int64_t OffsetFromFirst, StoreInst *SI) { - int64_t StoreSize = TD.getTypeStoreSize(SI->getOperand(0)->getType()); + int64_t StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType()); int64_t Slack = 0; // TODO - compute slack based on structure padding. // Make slack include padding if it is after this // element in a structure. @@ -387,7 +380,7 @@ struct MemOpRanges { // from MemsetRanges in MemCpyOptimizer // CUSTOM because MemsetRanges doesn't work with LoadInsts. void addLoad(int64_t OffsetFromFirst, LoadInst *LI) { Type* ptrType = LI->getOperand(0)->getType(); - int64_t LoadSize = TD.getTypeStoreSize(ptrType->getPointerElementType()); + int64_t LoadSize = DL.getTypeStoreSize(ptrType->getPointerElementType()); int64_t Slack = GET_EXTRA; // Pretend loads use more space... addRange(OffsetFromFirst, LoadSize, Slack, @@ -433,7 +426,7 @@ void MemOpRanges::addRange(int64_t Start, int64_t Size, int64_t Slack, Value *Pt // This store overlaps with I, add it. I->TheStores.push_back(Inst); - // Update End too. + // CUSTOM: Update End too. if (End > I->End) I->End = End; // At this point, we may have an interval that completely contains our store. @@ -475,18 +468,18 @@ void MemOpRanges::addRange(int64_t Start, int64_t Size, int64_t Slack, Value *Pt // END stolen from MemCpyOptimizer. struct AggregateGlobalOpsOpt : public FunctionPass { - const DataLayout *TD; + const DataLayout *DL; unsigned globalSpace; public: static char ID; // Pass identification, replacement for typeid AggregateGlobalOpsOpt() : FunctionPass(ID) { - TD = 0; + DL = 0; errs() << "Warning: aggregate-global-opts using default configuration\n"; globalSpace = 100; } AggregateGlobalOpsOpt(unsigned _globalSpace) : FunctionPass(ID) { - TD = 0; + DL = 0; globalSpace = _globalSpace; } @@ -525,27 +518,27 @@ FunctionPass *createAggregateGlobalOpsOptPass(unsigned globalSpace) /// removed some loads or stores and that might invalidate an iterator. Instruction *AggregateGlobalOpsOpt::tryAggregating(Instruction *StartInst, Value *StartPtr, bool DebugThis) { - if (TD == 0) return 0; + if (DL == 0) return 0; Module* M = StartInst->getParent()->getParent()->getParent(); LLVMContext& Context = StartInst->getContext(); Type* int8Ty = Type::getInt8Ty(Context); - Type* sizeTy = Type::getInt64Ty(Context); + Type* sizeTy = DL->getIntPtrType(Context, 0); Type* globalInt8PtrTy = int8Ty->getPointerTo(globalSpace); bool isLoad = isa(StartInst); bool isStore = isa(StartInst); Instruction *lastAddedInsn = NULL; Instruction *LastLoadOrStore = NULL; - + SmallVector toRemove; // Okay, so we now have a single global load/store. Scan to find // all subsequent stores of the same value to offset from the same pointer. // Join these together into ranges, so we can decide whether contiguous blocks // are stored. - MemOpRanges Ranges(*TD); - + MemOpRanges Ranges(*DL); + // Put the first store in since we want to preserve the order. Ranges.addInst(0, StartInst); @@ -558,7 +551,7 @@ Instruction *AggregateGlobalOpsOpt::tryAggregating(Instruction *StartInst, Value Instruction& insnRef = *BI; Instruction* insn = &insnRef; - if( isGlobalLoadOrStore(insn, globalSpace, isLoad, isStore) ) { + if( isMergeableGlobalLoadOrStore(insn, globalSpace, isLoad, isStore) ) { // OK! } else { // If the instruction is readnone, ignore it, otherwise bail out. We @@ -578,7 +571,7 @@ Instruction *AggregateGlobalOpsOpt::tryAggregating(Instruction *StartInst, Value // Check to see if this store is to a constant offset from the start ptr. int64_t Offset; - if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD)) + if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *DL)) break; Ranges.addStore(Offset, NextStore); @@ -589,7 +582,7 @@ Instruction *AggregateGlobalOpsOpt::tryAggregating(Instruction *StartInst, Value // Check to see if this load is to a constant offset from the start ptr. int64_t Offset; - if (!IsPointerOffset(StartPtr, NextLoad->getPointerOperand(), Offset, *TD)) + if (!IsPointerOffset(StartPtr, NextLoad->getPointerOperand(), Offset, *DL)) break; Ranges.addLoad(Offset, NextLoad); @@ -614,8 +607,6 @@ Instruction *AggregateGlobalOpsOpt::tryAggregating(Instruction *StartInst, Value for (MemOpRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { const MemOpRange &Range = *I; - Value* oldBaseI = NULL; - Value* newBaseI = NULL; if (Range.TheStores.size() == 1) continue; // Don't bother if there's only one thing... @@ -626,8 +617,9 @@ Instruction *AggregateGlobalOpsOpt::tryAggregating(Instruction *StartInst, Value StartPtr = Range.StartPtr; if( DebugThis ) { - errs() << "base is:"; - StartPtr->dump(); + dbgs() << "base is:"; + StartPtr->print(dbgs(), true); + dbgs() << '\n'; } // Determine alignment @@ -635,24 +627,17 @@ Instruction *AggregateGlobalOpsOpt::tryAggregating(Instruction *StartInst, Value if (Alignment == 0) { Type *EltType = cast(StartPtr->getType())->getElementType(); - Alignment = TD->getABITypeAlignment(EltType); + Alignment = DL->getABITypeAlignment(EltType); } Instruction *alloc = NULL; - Value *globalPtr = NULL; // create temporary alloca space to communicate to/from. alloc = makeAlloca(int8Ty, "agg.tmp", insertBefore, Range.End-Range.Start, Alignment); - // Generate the old and new base pointers before we output - // anything else. - { - Type* iPtrTy = TD->getIntPtrType(alloc->getType()); - Type* iNewBaseTy = TD->getIntPtrType(alloc->getType()); - oldBaseI = builder.CreatePtrToInt(StartPtr, iPtrTy, "agg.tmp.oldb.i"); - newBaseI = builder.CreatePtrToInt(alloc, iNewBaseTy, "agg.tmp.newb.i"); - } + // Cast the old base pointer to i8, but with the same address space. + //Value* StartPtrI8 = builder.CreatePointerCast(StartPtr, globalInt8PtrTy); // If storing, do the stores we had into our alloca'd region. if( isStore ) { @@ -662,29 +647,36 @@ Instruction *AggregateGlobalOpsOpt::tryAggregating(Instruction *StartInst, Value StoreInst* oldStore = cast(*SI); if( DebugThis ) { - errs() << "have store in range:"; - oldStore->dump(); + dbgs() << "have store in range:"; + oldStore->print(dbgs(), true); + dbgs() << '\n'; } - Value* ptrToAlloc = rebasePointer(oldStore->getPointerOperand(), - StartPtr, alloc, "agg.tmp", - &builder, *TD, oldBaseI, newBaseI); - // Old load must not be volatile or atomic... or we shouldn't have put - // it in ranges + int64_t offset = 0; + bool ok = IsPointerOffset(StartPtr, oldStore->getPointerOperand(), + offset, *DL); + assert(ok && offset >= 0); // we used this before, didn't we? assert(!(oldStore->isVolatile() || oldStore->isAtomic())); + + Constant* offsetC = ConstantInt::get(sizeTy, offset, true); + Value* offsets[] = {offsetC}; + Value* i8Dst = builder.CreateInBoundsGEP(int8Ty, + alloc, + offsets); + + Type* origDstTy = oldStore->getPointerOperand()->getType(); + Type* DstTy = origDstTy->getPointerElementType()->getPointerTo(0); + Value* Dst = builder.CreatePointerCast(i8Dst, DstTy); + StoreInst* newStore = - builder.CreateStore(oldStore->getValueOperand(), ptrToAlloc); + builder.CreateStore(oldStore->getValueOperand(), Dst); newStore->setAlignment(oldStore->getAlignment()); newStore->takeName(oldStore); } } // cast the pointer that was load/stored to i8 if necessary. - if( StartPtr->getType()->getPointerElementType() == int8Ty ) { - globalPtr = StartPtr; - } else { - globalPtr = builder.CreatePointerCast(StartPtr, globalInt8PtrTy, "agg.cast"); - } + Value *globalPtr = builder.CreatePointerCast(StartPtr, globalInt8PtrTy); // Get a Constant* for the length. Constant* len = ConstantInt::get(sizeTy, Range.End-Range.Start, false); @@ -739,17 +731,27 @@ Instruction *AggregateGlobalOpsOpt::tryAggregating(Instruction *StartInst, Value SE = Range.TheStores.end(); SI != SE; ++SI) { LoadInst* oldLoad = cast(*SI); if( DebugThis ) { - errs() << "have load in range:"; - oldLoad->dump(); + dbgs() << "have load in range:"; + oldLoad->print(dbgs(), true); + dbgs() << '\n'; } - Value* ptrToAlloc = rebasePointer(oldLoad->getPointerOperand(), - StartPtr, alloc, "agg.tmp", - &builder, *TD, oldBaseI, newBaseI); - // Old load must not be volatile or atomic... or we shouldn't have put - // it in ranges + int64_t offset = 0; + bool ok = IsPointerOffset(StartPtr, oldLoad->getPointerOperand(), + offset, *DL); + assert(ok && offset >= 0); // we used this before, didn't we? assert(!(oldLoad->isVolatile() || oldLoad->isAtomic())); - LoadInst* newLoad = builder.CreateLoad(ptrToAlloc); + + Constant* offsetC = ConstantInt::get(sizeTy, offset, true); + Value* offsets[] = {offsetC}; + Value* i8Src = builder.CreateInBoundsGEP(int8Ty, + alloc, + offsets); + Type* origSrcTy = oldLoad->getPointerOperand()->getType(); + Type* SrcTy = origSrcTy->getPointerElementType()->getPointerTo(0); + Value* Src = builder.CreatePointerCast(i8Src, SrcTy); + + LoadInst* newLoad = builder.CreateLoad(Src); newLoad->setAlignment(oldLoad->getAlignment()); oldLoad->replaceAllUsesWith(newLoad); newLoad->takeName(oldLoad); @@ -776,32 +778,54 @@ Instruction *AggregateGlobalOpsOpt::tryAggregating(Instruction *StartInst, Value return lastAddedInsn; } -// MemCpyOpt::runOnFunction - This is the main transformation entry point for a -// function. +// AggregateGlobalOpsOpt::runOnFunction - This is the main transformation +// entry point for a function. // bool AggregateGlobalOpsOpt::runOnFunction(Function &F) { - bool MadeChange = false; + bool ChangedFn = false; bool DebugThis = DEBUG; - + +/* std::string fname = F.getName(); + std::hash hasher; + int h = (int) hasher(fname); + int mask = AGOMASK; + int id = AGOID; + if( (h & mask) != id) return false; + + if( fname.size() != 7 ) return false; + + if( F.getName().startswith("on_fn") ) return false; + + if (fname == "string2" || fname == "message") return false; + + if (fname == "deinit9") return false; // OK + if (fname == "deinit5") return false; // OK + //if (fname == "deinit6") return false; + //if( F.getName().startswith("deinit") ) return false; +*/ if( debugThisFn[0] && F.getName() == debugThisFn ) { DebugThis = true; } //MD = &getAnalysis(); #if HAVE_LLVM_VER >= 37 - TD = & F.getParent()->getDataLayout(); + DL = & F.getParent()->getDataLayout(); #elif HAVE_LLVM_VER >= 35 - TD = & getAnalysisIfAvailable()->getDataLayout(); + DL = & getAnalysisIfAvailable()->getDataLayout(); #else - TD = getAnalysisIfAvailable(); + DL = getAnalysisIfAvailable(); #endif //TLI = &getAnalysis(); // Walk all instruction in the function. for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) { + + bool ChangedBB = false; + if( DebugThis ) { - errs() << "Working on BB "; - BB->dump(); + dbgs() << "Working on BB "; + BB->print(dbgs(), true); + dbgs() << '\n'; } for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { @@ -810,10 +834,11 @@ bool AggregateGlobalOpsOpt::runOnFunction(Function &F) { Instruction *I = &insnRef; ++BI; - if( isGlobalLoadOrStore(I, globalSpace, true, true) ) { + if( isMergeableGlobalLoadOrStore(I, globalSpace, true, true) ) { Instruction* lastAdded = tryAggregating(I, getLoadStorePointer(I), DebugThis); if( lastAdded ) { - MadeChange = true; + ChangedBB = true; + ChangedFn = true; #if HAVE_LLVM_VER >= 38 BI = lastAdded->getIterator(); #else @@ -823,9 +848,11 @@ bool AggregateGlobalOpsOpt::runOnFunction(Function &F) { } } - if( DebugThis && MadeChange ) { - errs() << "After transform BB is "; - BB->dump(); + if( DebugThis && ChangedBB ) { + dbgs() << "in function " << F.getName() << "\n"; + dbgs() << "After transform BB is "; + BB->print(dbgs(), true); + dbgs() << '\n'; } } @@ -838,8 +865,11 @@ bool AggregateGlobalOpsOpt::runOnFunction(Function &F) { #endif } + if (DebugThis && ChangedFn) + printf("AggregateGlobalOpsOpt changed %s\n", F.getName().str().c_str()); + //MD = 0; - return MadeChange; + return ChangedFn; } diff --git a/compiler/util/llvmDebug.cpp b/compiler/util/llvmDebug.cpp index ee4746737669..2ae1d3e688a2 100644 --- a/compiler/util/llvmDebug.cpp +++ b/compiler/util/llvmDebug.cpp @@ -131,7 +131,7 @@ LLVM_DITYPE debug_data::construct_type(Type *type) if(N) return toDITYPE(N); GenInfo* info = gGenInfo; - LLVM_TARGET_DATA *layout = info->targetData; + const llvm::DataLayout& layout = info->module->getDataLayout(); llvm::Type* ty = type->symbol->llvmType; const char* name = type->symbol->name; @@ -157,9 +157,9 @@ LLVM_DITYPE debug_data::construct_type(Type *type) if(ty->isIntegerTy()) { N = this->dibuilder.createBasicType( name, /* Name */ - layout->getTypeSizeInBits(ty), /* SizeInBits */ + layout.getTypeSizeInBits(ty), /* SizeInBits */ #if HAVE_LLVM_VER <= 39 - 8*layout->getABITypeAlignment(ty), /* AlignInBits */ + 8*layout.getABITypeAlignment(ty), /* AlignInBits */ #endif (is_signed(type))? (llvm::dwarf::DW_ATE_signed): @@ -172,9 +172,9 @@ LLVM_DITYPE debug_data::construct_type(Type *type) else if(ty->isFloatingPointTy()) { N = this->dibuilder.createBasicType( name, - layout->getTypeSizeInBits(ty), + layout.getTypeSizeInBits(ty), #if HAVE_LLVM_VER <= 39 - 8*layout->getABITypeAlignment(ty), + 8*layout.getABITypeAlignment(ty), #endif llvm::dwarf::DW_ATE_float); @@ -186,7 +186,7 @@ LLVM_DITYPE debug_data::construct_type(Type *type) if(type != type->getValType()) {//Add this condition to avoid segFault N = this->dibuilder.createPointerType( get_type(type->getValType()),//it should return the pointee's DIType - layout->getPointerSizeInBits(ty->getPointerAddressSpace()), + layout.getPointerSizeInBits(ty->getPointerAddressSpace()), 0, /* alignment */ #if HAVE_LLVM_VER >= 50 llvm::None, @@ -204,15 +204,15 @@ LLVM_DITYPE debug_data::construct_type(Type *type) LLVM_DITYPE pteIntDIType; //create the DI-pointeeType pteIntDIType = this->dibuilder.createBasicType( myGetTypeName(PointeeTy), - layout->getTypeSizeInBits(PointeeTy), + layout.getTypeSizeInBits(PointeeTy), #if HAVE_LLVM_VER <= 39 - 8*layout->getABITypeAlignment(PointeeTy), + 8*layout.getABITypeAlignment(PointeeTy), #endif llvm::dwarf::DW_ATE_unsigned); N = this->dibuilder.createPointerType( pteIntDIType, - layout->getPointerSizeInBits(ty->getPointerAddressSpace()), + layout.getPointerSizeInBits(ty->getPointerAddressSpace()), 0, #if HAVE_LLVM_VER >= 50 llvm::None, @@ -231,10 +231,10 @@ LLVM_DITYPE debug_data::construct_type(Type *type) get_file(defFile), /* File */ 0, /* LineNumber */ (PointeeTy->isSized()? - layout->getTypeSizeInBits(PointeeTy): + layout.getTypeSizeInBits(PointeeTy): 8), /* SizeInBits */ (PointeeTy->isSized()? - 8*layout->getABITypeAlignment(PointeeTy): + 8*layout.getABITypeAlignment(PointeeTy): 8), /* AlignInBits */ FLAG_ZERO, /* Flags */ toDITYPE(NULL), /* DerivedFrom */ @@ -247,7 +247,7 @@ LLVM_DITYPE debug_data::construct_type(Type *type) N = this->dibuilder.createPointerType( pteStrDIType, - layout->getPointerSizeInBits(ty->getPointerAddressSpace()), + layout.getPointerSizeInBits(ty->getPointerAddressSpace()), 0, #if HAVE_LLVM_VER >= 50 llvm::None, @@ -272,7 +272,7 @@ LLVM_DITYPE debug_data::construct_type(Type *type) if(vt) { N = this->dibuilder.createPointerType( get_type(vt), - layout->getPointerSizeInBits(ty->getPointerAddressSpace()), + layout.getPointerSizeInBits(ty->getPointerAddressSpace()), 0, #if HAVE_LLVM_VER >= 50 llvm::None, @@ -297,15 +297,15 @@ LLVM_DITYPE debug_data::construct_type(Type *type) get_file(defFile), defLine, 0, // RuntimeLang - layout->getTypeSizeInBits(ty), - 8*layout->getABITypeAlignment(ty)); + layout.getTypeSizeInBits(ty), + 8*layout.getABITypeAlignment(ty)); //N is added to the map (early) so that element search below can find it, //so as to avoid infinite recursion for structs that contain pointers to //their own type. myTypeDescriptors[type] = N; - slayout = layout->getStructLayout(struct_type); + slayout = layout.getStructLayout(struct_type); for_fields(field, this_class) { // field is a Symbol const char* fieldDefFile = field->defPoint->fname(); @@ -326,8 +326,8 @@ LLVM_DITYPE debug_data::construct_type(Type *type) field->name, get_file(fieldDefFile), fieldDefLine, - layout->getTypeSizeInBits(fty), - 8*layout->getABITypeAlignment(fty), + layout.getTypeSizeInBits(fty), + 8*layout.getABITypeAlignment(fty), slayout->getElementOffsetInBits(this_class->getMemberGEP(field->cname)), FLAG_ZERO, fditype); @@ -341,8 +341,8 @@ LLVM_DITYPE debug_data::construct_type(Type *type) name, /* Name */ get_file(defFile), /* File */ defLine, /* LineNumber */ - layout->getTypeSizeInBits(ty), /* SizeInBits */ - 8*layout->getABITypeAlignment(ty), /* AlignInBits */ + layout.getTypeSizeInBits(ty), /* SizeInBits */ + 8*layout.getABITypeAlignment(ty), /* AlignInBits */ FLAG_ZERO, /* Flags */ derivedFrom, /* DerivedFrom */ this->dibuilder.getOrCreateArray(EltTys) /* Elements */ @@ -364,7 +364,7 @@ LLVM_DITYPE debug_data::construct_type(Type *type) const llvm::StructLayout* slayout = NULL; llvm::StructType* struct_type = llvm::cast(ty); - slayout = layout->getStructLayout(struct_type); + slayout = layout.getStructLayout(struct_type); N = this->dibuilder.createForwardDecl( llvm::dwarf::DW_TAG_structure_type, @@ -373,8 +373,8 @@ LLVM_DITYPE debug_data::construct_type(Type *type) get_file(defFile), defLine, 0, // RuntimeLang - layout->getTypeSizeInBits(ty), - 8*layout->getABITypeAlignment(ty)); + layout.getTypeSizeInBits(ty), + 8*layout.getABITypeAlignment(ty)); //N is added to the map (early) so that element search below can find it, //so as to avoid infinite recursion for structs that contain pointers to @@ -401,8 +401,8 @@ LLVM_DITYPE debug_data::construct_type(Type *type) field->name, get_file(fieldDefFile), fieldDefLine, - layout->getTypeSizeInBits(fty), - 8*layout->getABITypeAlignment(fty), + layout.getTypeSizeInBits(fty), + 8*layout.getABITypeAlignment(fty), slayout->getElementOffsetInBits(this_class->getMemberGEP(field->cname)), FLAG_ZERO, fditype); @@ -416,8 +416,8 @@ LLVM_DITYPE debug_data::construct_type(Type *type) name, get_file(defFile), defLine, - layout->getTypeSizeInBits(ty), - 8*layout->getABITypeAlignment(ty), + layout.getTypeSizeInBits(ty), + 8*layout.getABITypeAlignment(ty), FLAG_ZERO, derivedFrom, this->dibuilder.getOrCreateArray(EltTys)); @@ -431,8 +431,8 @@ LLVM_DITYPE debug_data::construct_type(Type *type) name, get_file(defFile), defLine, - layout->getTypeSizeInBits(ty), - 8*layout->getABITypeAlignment(ty), + layout.getTypeSizeInBits(ty), + 8*layout.getABITypeAlignment(ty), FLAG_ZERO, derivedFrom, this->dibuilder.getOrCreateArray(EltTys)); @@ -446,8 +446,8 @@ LLVM_DITYPE debug_data::construct_type(Type *type) name, get_file(defFile), defLine, - layout->getTypeSizeInBits(ty), - 8*layout->getABITypeAlignment(ty), + layout.getTypeSizeInBits(ty), + 8*layout.getABITypeAlignment(ty), FLAG_ZERO, this->dibuilder.getOrCreateArray(EltTys)); @@ -467,7 +467,7 @@ LLVM_DITYPE debug_data::construct_type(Type *type) Type *eleType = eleSym->type; N = this->dibuilder.createArrayType( Asize, - 8*layout->getABITypeAlignment(ty), + 8*layout.getABITypeAlignment(ty), get_type(eleType), this->dibuilder.getOrCreateArray(Subscripts)); diff --git a/compiler/util/llvmGlobalToWide.cpp b/compiler/util/llvmGlobalToWide.cpp index beff04d06085..701ca8ac6aa8 100644 --- a/compiler/util/llvmGlobalToWide.cpp +++ b/compiler/util/llvmGlobalToWide.cpp @@ -1,15 +1,15 @@ /* * Copyright 2004-2017 Cray Inc. * Other additional copyright holders may be indicated within. - * + * * The entirety of this work is licensed under the Apache License, * Version 2.0 (the "License"); you may not use this file except * in compliance with the License. - * + * * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -18,11 +18,11 @@ */ //===----------------------------------------------------------------------===// -// Chapel LLVM Wide Opt +// Chapel LLVM Wide Opt //===----------------------------------------------------------------------===// // When --llvm-wide-opt is invoked, the code generator generates global // pointers - ie, those with a special address space - instead of wide -// pointer structures. +// pointer structures. // // Then, LLVM optimizations are run on this bitcode if you supply --fast. // This pass then is run to lower the operations on global pointers to @@ -43,10 +43,12 @@ #include "llvm/Pass.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Debug.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #if HAVE_LLVM_VER >= 35 +#include "llvm/IR/Attributes.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Verifier.h" @@ -65,11 +67,22 @@ using namespace llvm; namespace { + // Here is some information about the layout of wide pointers + // This code assumes it is + // locale-id + // node + // ... // addr + + // Besides these GEPs, createWidePointerToType relies on the representation. + unsigned wideLocaleGEP[] = {0}; + unsigned wideNodeGEP[] = {0,0}; + unsigned wideAddrGEP[] = {1}; + static const bool debugAllPassOne = false; static const bool debugAllPassTwo = false; - static const bool extraChecks = false; + static const bool extraChecks = true; // Set a function name here to get lots of debugging output. - static const char* debugThisFn = ""; + static const char* debugThisFn = "";//"deinit6"; AllocaInst* makeAlloca(llvm::Type* type, const char* name, @@ -120,17 +133,17 @@ namespace { assert(New->getParent()); assert(New && "Value::replaceAllUsesWith() is invalid!"); assert(New != Old && "this->replaceAllUsesWith(this) is NOT valid!"); - + // Notify all ValueHandles (if present) that this value is going away. if (Old->hasValueHandle()) ValueHandleBase::ValueIsRAUWd(Old, New); - + while (!Old->use_empty()) { User* U = Old->use_back(); U->replaceUsesOfWith(Old, New); } - + Old->eraseFromParent(); } #endif @@ -180,38 +193,110 @@ namespace { return ConstantInt::get(ty, val); } + Instruction* createRaddr(GlobalToWideInfo* info, Value* widePtr, Instruction* insertBefore) { + // Assuming widePtr is a struct wide-pointer, extract the address + assert(widePtr->getType()->isStructTy()); + + return ExtractValueInst::Create(widePtr, + wideAddrGEP, + "", insertBefore); + } + Instruction* createRlocale(GlobalToWideInfo* info, Value* widePtr, Instruction* insertBefore) { + // Assuming widePtr is a struct wide-pointer, extract the address + assert(widePtr->getType()->isStructTy()); + + return ExtractValueInst::Create(widePtr, + wideLocaleGEP, + "", insertBefore); + } + Instruction* createRnode(GlobalToWideInfo* info, Value* widePtr, Instruction* insertBefore) { + // Assuming widePtr is a struct wide-pointer, extract the address + assert(widePtr->getType()->isStructTy()); + + return ExtractValueInst::Create(widePtr, + wideNodeGEP, + "", insertBefore); + } + Instruction* createWideAddr(GlobalToWideInfo* info, + Value* localeId, Value* addr, + Type* widePtrType, + Instruction* insertBefore) { + + Constant* undefWidePtr = UndefValue::get(widePtrType); + + Instruction* locSet = InsertValueInst::Create(undefWidePtr, localeId, + wideLocaleGEP, + "", insertBefore); + + Instruction* ptrSet = InsertValueInst::Create(locSet, addr, + wideAddrGEP, + "", insertBefore); + return ptrSet; + } + Value* createWideBitCast(GlobalToWideInfo* info, Value* widePtr, Type* widePtrType, Instruction* insertBefore) { - /* a sketch of how it would work with structs: - * // The destination type should be a wide pointer. assert(widePtrType->isStructTy()); + if( widePtr->getType() == widePtrType ) return widePtr; + Value* loc = ExtractValueInst::Create(widePtr, - info->wideLocGEP, + wideLocaleGEP, "", insertBefore); Value* ptr = ExtractValueInst::Create(widePtr, - info->wideAddrGEP, + wideAddrGEP, "", insertBefore); Constant* undef = UndefValue::get(widePtrType); Constant* undefLocPtr = ConstantExpr::getExtractValue(undef, - info->wideAddrGEP); + wideAddrGEP); // get the local address space pointer. - Value* cast = CastInst::CreatePointerCast(ptr, undefLocPtr->getType(), "", insertBefore); + Value* cast = CastInst::CreatePointerCast(ptr, undefLocPtr->getType(), + "", insertBefore); Instruction* locSet = InsertValueInst::Create(undef, loc, - info->wideLocGEP, + wideLocaleGEP, "", insertBefore); Instruction* ptrSet = InsertValueInst::Create(locSet, cast, - info->wideAddrGEP, + wideAddrGEP, "", insertBefore); return ptrSet; - */ - if( widePtr->getType() == widePtrType ) return widePtr; - else return CastInst::CreatePointerCast(widePtr, widePtrType, - "", insertBefore); + } + + // Creates a store/load pattern with bitcasts to implement complex + // type conversions (converting a wide pointer type to an integer type, say). + Instruction* createStoreLoadCast(Value* fromValue, + Type* toType, + Instruction* insertBefore) { + + Function *inFunc = insertBefore->getParent()->getParent(); + const DataLayout &DL = inFunc->getParent()->getDataLayout(); + + Type* allocType = toType; + if (DL.getTypeStoreSize(fromValue->getType()) > + DL.getTypeStoreSize(toType)) + allocType = fromValue->getType(); + + Value* alloc = makeAlloca(allocType, "widecast", insertBefore); + + Type* fromPtrType = fromValue->getType()->getPointerTo(); + Type* newPtrType = toType->getPointerTo(); + + Value* allocAsFrom = alloc; + if (allocAsFrom->getType() != fromPtrType) + allocAsFrom = CastInst::CreatePointerCast(alloc, fromPtrType, + "", insertBefore); + Value* allocAsNew = alloc; + if (allocAsNew->getType() != newPtrType) + allocAsNew = CastInst::CreatePointerCast(alloc, newPtrType, + "", insertBefore); + + new StoreInst(fromValue, allocAsFrom, insertBefore); + Instruction* load = new LoadInst(allocAsNew, "", insertBefore); + + return load; } void checkFunctionExistAndHasArgs(Constant* f, unsigned nArgs) @@ -252,6 +337,7 @@ namespace { GlobalToWideInfo * info; bool debugPassTwo; // these are here to save some typing. + llvm::Type* nodeTy; llvm::Type* voidTy; llvm::Type* voidPtrTy; llvm::Type* glVoidPtrTy; @@ -259,12 +345,9 @@ namespace { llvm::Type* ptrLocTy; llvm::Type* i64Ty; llvm::Type* i8Ty; + llvm::Type* sizeTy; // See llvmGlobalToWide.h for descriptions of these functions - llvm::Constant* addrFn; - llvm::Constant* locFn; - llvm::Constant* nodeFn; - llvm::Constant* makeFn; llvm::Constant* getFn; llvm::Constant* putFn; llvm::Constant* getPutFn; @@ -282,6 +365,7 @@ namespace { assert(info->localeIdType != 0); assert(info->nodeIdType != 0); + nodeTy = info->nodeIdType; voidTy = llvm::Type::getVoidTy(M.getContext()); voidPtrTy = llvm::Type::getInt8PtrTy(M.getContext(), 0); glVoidPtrTy = llvm::Type::getInt8PtrTy(M.getContext(), @@ -291,99 +375,60 @@ namespace { i64Ty = llvm::Type::getInt64Ty(M.getContext()); i8Ty = llvm::Type::getInt8Ty(M.getContext()); + const DataLayout& DL = M.getDataLayout(); + sizeTy = DL.getIntPtrType(M.getContext(), 0); + assert(voidPtrTy); assert(wideVoidPtrTy); - addrFn = info->addrFn; - if( ! addrFn ) { - addrFn = M.getOrInsertFunction("chpl_wide_ptr_get_address_sym", - voidPtrTy, wideVoidPtrTy -#if HAVE_LLVM_VER < 50 - , NULL -#endif - ); - } - checkFunctionExistAndHasArgs(addrFn, 1); - - locFn = info->locFn; - if( ! locFn ) { - locFn = M.getOrInsertFunction("chpl_wide_ptr_read_localeID_sym", - voidTy, wideVoidPtrTy, ptrLocTy -#if HAVE_LLVM_VER < 50 - , NULL -#endif - ); - } - checkFunctionExistAndHasArgs(locFn, 2); - - nodeFn = info->nodeFn; - if( ! nodeFn ) { - nodeFn = M.getOrInsertFunction("chpl_wide_ptr_get_node_sym", - info->nodeIdType, wideVoidPtrTy -#if HAVE_LLVM_VER < 50 - , NULL -#endif - ); - } - checkFunctionExistAndHasArgs(nodeFn, 1); - - makeFn = info->makeFn; - if( ! makeFn ) { - makeFn = M.getOrInsertFunction("chpl_return_wide_ptr_loc_sym", - wideVoidPtrTy, ptrLocTy, voidPtrTy -#if HAVE_LLVM_VER < 50 - , NULL -#endif - ); - } - checkFunctionExistAndHasArgs(makeFn, 2); - getFn = info->getFn; if( ! getFn ) { getFn = M.getOrInsertFunction("chpl_gen_comm_get_ctl_sym", voidTy, - voidPtrTy, wideVoidPtrTy, - i64Ty, i64Ty + voidPtrTy, nodeTy, voidPtrTy, + sizeTy, i64Ty #if HAVE_LLVM_VER < 50 , NULL #endif ); } - checkFunctionExistAndHasArgs(getFn, 4); + checkFunctionExistAndHasArgs(getFn, 5); putFn = info->putFn; if( ! putFn ) { putFn = M.getOrInsertFunction("chpl_gen_comm_put_ctl_sym", voidTy, - wideVoidPtrTy, voidPtrTy, - i64Ty, i64Ty + nodeTy, voidPtrTy, voidPtrTy, + sizeTy, i64Ty #if HAVE_LLVM_VER < 50 , NULL #endif ); } - checkFunctionExistAndHasArgs(putFn, 4); + checkFunctionExistAndHasArgs(putFn, 5); getPutFn = info->getPutFn; if( ! getPutFn ) { getPutFn = M.getOrInsertFunction("chpl_gen_comm_getput_sym", voidTy, - wideVoidPtrTy, wideVoidPtrTy, - i64Ty + nodeTy, voidPtrTy, + nodeTy, voidPtrTy, + sizeTy #if HAVE_LLVM_VER < 50 , NULL #endif ); } - checkFunctionExistAndHasArgs(getPutFn, 3); + checkFunctionExistAndHasArgs(getPutFn, 5); memsetFn = info->memsetFn; if( ! memsetFn ) { memsetFn = M.getOrInsertFunction("chpl_gen_comm_memset_sym", voidTy, - wideVoidPtrTy, i8Ty, i64Ty + nodeTy, voidPtrTy, + i8Ty, sizeTy #if HAVE_LLVM_VER < 50 , NULL #endif ); } - checkFunctionExistAndHasArgs(memsetFn, 3); + checkFunctionExistAndHasArgs(memsetFn, 4); } Function* getGlobalToWideFn(Type* globalPtrTy) { @@ -428,7 +473,7 @@ namespace { Function* calledFn = call->getCalledFunction(); // null if indirect // handle wide2global, global2wide - if ( calledFn && info->specialFunctions.count(calledFn) ) { + if ( calledFn && calledFn->getName().startswith(GLOBAL_FN) ) { // Distinguish among the various special functions by the // function signature. if( calledFn->getName().startswith(GLOBAL_FN_WIDE_TO_GLOBAL) || @@ -454,7 +499,7 @@ namespace { void fixInstruction(Instruction* insn) { if( debugPassTwo ) { - errs() << " atf|" << *insn << "|" << "\n"; + dbgs() << " atf|" << *insn << "|" << "\n"; } // First, check to see if the instruction operates on @@ -473,7 +518,7 @@ namespace { if( isaGlobalPointer(info, insn->getType()) ) needsWork = true; if( ! needsWork ) { - if( debugPassTwo ) errs() << " okf|" << *insn << "|" << "\n"; + if( debugPassTwo ) dbgs() << " okf|" << *insn << "|" << "\n"; return; } @@ -520,13 +565,13 @@ namespace { // First, workaround a problem introduced by LLVM // optimization. - + Type* srcTy = oldCast->getSrcTy(); Type* dstTy = oldCast->getDestTy(); - bool fromOk = srcTy->isPointerTy() || + bool fromOk = srcTy->isPointerTy() || srcTy->isFunctionTy(); - bool toOK = dstTy->isPointerTy() || + bool toOK = dstTy->isPointerTy() || dstTy->isFunctionTy(); bool fromGlobal = srcTy->isPointerTy() && @@ -575,6 +620,54 @@ namespace { myReplaceInstWithInst(oldCast, w2g); break; } + case Instruction::GetElementPtr: { + GetElementPtrInst* oldGEP = cast(insn); + + if( oldGEP->getPointerAddressSpace() == info->globalSpace ) { + // since GEP doesn't work with a wide pointer argument, + // we have to translate it to a GEP on the local portion, then + // re-construct the wide address. + + + // %w = g2w(thing) + // w2g( make( node(%w), getelementptr addr(%w) ...)) + + + Value* oldPtr = oldGEP->getPointerOperand(); + Type* oldPointeeType = oldGEP->getSourceElementType(); + Type* oldResultType = oldGEP->getType(); + SmallVector inds(oldGEP->idx_begin(), oldGEP->idx_end()); + + Type* newPointeeType = convertTypeGlobalToWide(&M, info, + oldPointeeType); + Type* newResultType = convertTypeGlobalToWide(&M, info, + oldResultType); + + Value* w = callGlobalToWideFn(oldPtr, oldGEP); + Value* loc = createRlocale(info, w, oldGEP); + Value* raddr = createRaddr(info, w, oldGEP); + + GetElementPtrInst* newGEP = GetElementPtrInst::Create( + newPointeeType, + raddr, + inds, + oldGEP->getName(), + oldGEP); + + newGEP->setIsInBounds(oldGEP->isInBounds()); + + Instruction* outWidePtr = createWideAddr(info, loc, newGEP, + newResultType, oldGEP); + Instruction* out = outWidePtr; + + if( out->getType() != oldResultType ) { + out = callWideToGlobalFn(out, oldResultType, oldGEP); + } + + myReplaceInstWithInst(oldGEP, out); + + } + break; } case Instruction::Load: { LoadInst *oldLoad = cast(insn); if( oldLoad->getPointerAddressSpace() == info->globalSpace ) { @@ -588,15 +681,24 @@ namespace { Type* wLoadedTy = convertTypeGlobalToWide(&M, info, glLoadedTy); // Create a call to 'get' // first, alloca a temporary to 'get' into - Value* alloc = makeAlloca(wLoadedTy, "", oldLoad); + Value* alloc = makeAlloca(wLoadedTy, "", oldLoad); Value* castAlloc = new BitCastInst(alloc, voidPtrTy, "", oldLoad); - Value* args[4]; + Value* node = createRnode(info, wAddr, oldLoad); + Value* raddr = createRaddr(info, wAddr, oldLoad); + Value* castRaddr = new BitCastInst(raddr, voidPtrTy, "", oldLoad); + Value* size = createSizeof(info, wLoadedTy); + { + // Convert size if necessary + IRBuilder<> builder(oldLoad); + size = builder.CreateZExtOrTrunc(size, sizeTy); + } + + Value* args[5]; args[0] = castAlloc; - args[1] = createWideBitCast(info, wAddr, - wideVoidPtrTy, - oldLoad); - args[2] = createSizeof(info, wLoadedTy); - args[3] = createLoadStoreControl(M, info, oldLoad->getOrdering(), + args[1] = node; + args[2] = castRaddr; + args[3] = size; + args[4] = createLoadStoreControl(M, info, oldLoad->getOrdering(), #if HAVE_LLVM_VER >= 50 oldLoad->getSyncScopeID() #else @@ -640,7 +742,7 @@ namespace { Type* wStoredTy = convertTypeGlobalToWide(&M, info, glValueTy); Value* wValueOp = callGlobalToWideFn(glValueOp, oldStore); - Value* wPtrOp = callGlobalToWideFn(glAddrOp, oldStore); + Value* wAddr = callGlobalToWideFn(glAddrOp, oldStore); // Create a call to 'put' // first, alloca a temporary to 'put' from @@ -660,14 +762,23 @@ namespace { oldStore); assert(st); + Value* node = createRnode(info, wAddr, oldStore); + Value* raddr = createRaddr(info, wAddr, oldStore); + Value* castRaddr = new BitCastInst(raddr, voidPtrTy, "", oldStore); + Value* size = createSizeof(info, wStoredTy); + { + // Convert size if necessary + IRBuilder<> builder(oldStore); + size = builder.CreateZExtOrTrunc(size, sizeTy); + } + // Now put from the alloc'd area - Value* args[4]; - args[0] = createWideBitCast(info, wPtrOp, - wideVoidPtrTy, - oldStore); - args[1] = castAlloc; - args[2] = createSizeof(info, wStoredTy); - args[3] = createLoadStoreControl(M, info, oldStore->getOrdering(), + Value* args[5]; + args[0] = node; + args[1] = castRaddr; + args[2] = castAlloc; + args[3] = size; + args[4] = createLoadStoreControl(M, info, oldStore->getOrdering(), #if HAVE_LLVM_VER >= 50 oldStore->getSyncScopeID() #else @@ -679,12 +790,42 @@ namespace { myReplaceInstWithInst(oldStore, put); } break; } + case Instruction::PtrToInt: { + PtrToIntInst *ptrToInt = cast(insn); + if( ptrToInt->getPointerAddressSpace() == info->globalSpace ) { + Value* ptr = ptrToInt->getPointerOperand(); + const DataLayout& DL = M.getDataLayout(); + + assert(DL.getTypeSizeInBits(ptrToInt->getType()) == + DL.getTypeSizeInBits(ptr->getType())); + + Instruction* conv = createStoreLoadCast(ptr, + ptrToInt->getType(), + ptrToInt); + myReplaceInstWithInst(ptrToInt, conv); + } + break; } + case Instruction::IntToPtr: { + IntToPtrInst *intToPtr = cast(insn); + if( intToPtr->getAddressSpace() == info->globalSpace ) { + Value* i = intToPtr->getOperand(0); + const DataLayout& DL = M.getDataLayout(); + + assert(DL.getTypeSizeInBits(intToPtr->getType()) == + DL.getTypeSizeInBits(i->getType())); + + Instruction* conv = createStoreLoadCast(i, + intToPtr->getType(), + intToPtr); + myReplaceInstWithInst(intToPtr, conv); + } + break; } case Instruction::Call: { // handle e.g. wide2global, global2wide, memcpy CallInst *call = cast(insn); Function* calledFn = call->getCalledFunction(); // null if indirect - if ( calledFn && info->specialFunctions.count(calledFn) ) { + if ( calledFn && calledFn->getName().startswith(GLOBAL_FN)) { // Distinguish among the various special functions by name. if( calledFn->getName().startswith(GLOBAL_FN_WIDE_TO_GLOBAL)) { @@ -705,11 +846,7 @@ namespace { Type* wLocAddrTy = convertTypeGlobalToWide(&M, info, glLocAddrTy); Value* wAddr = callGlobalToWideFn(glAddr, call); - Value* args[1]; - args[0] = createWideBitCast(info, wAddr, - wideVoidPtrTy, - call); - Instruction* extr = CallInst::Create(addrFn, args, "", call); + Instruction* extr = createRaddr(info, wAddr, call); if( extr->getType() != calledFn->getReturnType() ) { extr = CastInst::CreatePointerCast(extr, wLocAddrTy, "", call); } @@ -728,14 +865,8 @@ namespace { Value* wAddr = callGlobalToWideFn(glAddr, call); - Value* localePtr = makeAlloca(info->localeIdType, "", call); - Value* args[2]; - args[0] = createWideBitCast(info, wAddr, - wideVoidPtrTy, - call); - args[1] = localePtr; - CallInst::Create(locFn, args, "", call); - Instruction* loc = new LoadInst(localePtr, "", call); + Instruction* loc = createRlocale(info, wAddr, call); + assert(!loc->getType()->isPointerTy()); myReplaceInstWithInst(call, loc); } else if( calledFn->getName().startswith(GLOBAL_FN_GLOBAL_NODEID)){ @@ -746,11 +877,7 @@ namespace { Value* wAddr = callGlobalToWideFn(glAddr, call); - Value* args[1]; - args[0] = createWideBitCast(info, wAddr, - wideVoidPtrTy, - call); - Instruction* node = CallInst::Create(nodeFn, args, "", call); + Instruction* node = createRnode(info, wAddr, call); assert(!node->getType()->isPointerTy()); myReplaceInstWithInst(call, node); } else if( calledFn->getName().startswith(GLOBAL_FN_GLOBAL_MAKE) ){ @@ -764,26 +891,9 @@ namespace { Type* gResType = calledFn->getReturnType(); Type* wResType = convertTypeGlobalToWide(&M, info, gResType); - - Value* addr = wLocAddr; - if( wLocAddr->getType() != voidPtrTy ) { - addr = CastInst::CreatePointerCast(addr, - voidPtrTy, - "", call); - } - // Create a temporary with the locale value. - Value* localePtr = makeAlloca(info->localeIdType, "", call); - new StoreInst(locale, localePtr, call); - Value* args[2]; - args[0] = localePtr; - args[1] = addr; - Instruction* make = CallInst::Create(makeFn, args, "", call); - if( make->getType() != wResType ) { - make = CastInst::CreatePointerCast(make, - wResType, - "", call); - } - assert(make->getType()->isPointerTy()); + + Instruction* make = createWideAddr(info, locale, wLocAddr, + wResType, call); make = callWideToGlobalFn(make, gResType, call); @@ -802,6 +912,11 @@ namespace { Value* gDst = call->getArgOperand(0); Value* gSrc = call->getArgOperand(1); Value* n = call->getArgOperand(2); + { + // Convert n if necessary + IRBuilder<> builder(call); + n = builder.CreateZExtOrTrunc(n, sizeTy); + } dstSpace = gDst->getType()->getPointerAddressSpace(); srcSpace = gSrc->getType()->getPointerAddressSpace(); @@ -826,36 +941,32 @@ namespace { if( dstSpace == info->globalSpace && srcSpace != info->globalSpace ) { // It's a PUT - Value* args[4]; - args[0] = createWideBitCast(info, wDst, - wideVoidPtrTy, - call); - args[1] = wSrc; - args[2] = n; - args[3] = ctl; + Value* args[5]; + args[0] = createRnode(info, wDst, call); + args[1] = createRaddr(info, wDst, call); + args[2] = wSrc; + args[3] = n; + args[4] = ctl; putget = CallInst::Create(putFn, args, "", call); } else if( srcSpace == info->globalSpace && dstSpace != info->globalSpace ) { // It's a GET - Value* args[4]; + Value* args[5]; args[0] = wDst; - args[1] = createWideBitCast(info, wSrc, - wideVoidPtrTy, - call); - args[2] = n; - args[3] = ctl; + args[1] = createRnode(info, wSrc, call); + args[2] = createRaddr(info, wSrc, call); + args[3] = n; + args[4] = ctl; putget = CallInst::Create(getFn, args, "", call); } else { - Value* args[3]; - args[0] = createWideBitCast(info, wDst, - wideVoidPtrTy, - call); - args[1] = createWideBitCast(info, wSrc, - wideVoidPtrTy, - call); - args[2] = n; + Value* args[5]; + args[0] = createRnode(info, wDst, call); + args[1] = createRaddr(info, wDst, call); + args[2] = createRnode(info, wSrc, call); + args[3] = createRaddr(info, wSrc, call); + args[4] = n; assert(getPutFn && "Missing get-put-function for global-to-global memcpy"); putget = CallInst::Create(getPutFn, args, "", call); @@ -867,17 +978,21 @@ namespace { Value* gDst = call->getArgOperand(0); Value* c = call->getArgOperand(1); Value* n = call->getArgOperand(2); + { + // Convert n if necessary + IRBuilder<> builder(call); + n = builder.CreateZExtOrTrunc(n, sizeTy); + } Value* wDst = callGlobalToWideFn(gDst, call); Instruction* mset = NULL; - Value* args[3]; - args[0] = createWideBitCast(info, wDst, - wideVoidPtrTy, - call); - args[1] = c; - args[2] = n; + Value* args[4]; + args[0] = createRnode(info, wDst, call); + args[1] = createRaddr(info, wDst, call); + args[2] = c; + args[3] = n; assert(memsetFn && "Missing memset-function for global memset"); mset = CallInst::Create(memsetFn, args, "", call); myReplaceInstWithInst(call, mset); @@ -923,7 +1038,7 @@ namespace { // TODO -- remove the bitcasts we added earlier. assert(0); } - } + } // Otherwise, return NULL to indicate we opted out // of modifying the constant directly. return NULL; @@ -936,7 +1051,7 @@ namespace { RemapFlags Flags, TypeFixer *TypeMapper) { ValueToValueMapTy::iterator I = VM.find(C); - + // If the value already exists in the map, use it. if (I != VM.end() && I->second) return cast(I->second); @@ -990,7 +1105,7 @@ namespace { RemapFlags Flags, TypeFixer *TypeMapper) { ValueToValueMapTy::iterator I = VM.find(V); - + // If the value already exists in the map, use it. if (I != VM.end() && I->second) return I->second; @@ -1029,7 +1144,7 @@ namespace { // Check for it in the map. ValueToValueMapTy::iterator I = VM.find(V); - + // If the value already exists in the map, use it. if (I != VM.end() && I->second) newV = I->second; else { @@ -1099,7 +1214,6 @@ namespace { */ GlobalToWide(GlobalToWideInfo* _info, std::string layout) : ModulePass(ID), info(_info), layoutAfterwards(layout), - //wideAddrGEPer(), wideLocGEPer(), debugPassOne(false), debugPassTwo(false) { @@ -1120,9 +1234,9 @@ namespace { bool madeInfo = false; if( debugThisFn[0] || debugAllPassOne || debugAllPassTwo ) { - errs() << "GlobalToWide: "; - errs().write_escaped(M.getModuleIdentifier()) << '\n'; - errs().write_escaped(M.getTargetTriple()) << '\n'; + dbgs() << "GlobalToWide: "; + dbgs().write_escaped(M.getModuleIdentifier()) << '\n'; + dbgs().write_escaped(M.getTargetTriple()) << '\n'; } // Normally we expect a user of this optimization to have @@ -1135,6 +1249,7 @@ namespace { madeInfo = true; info->globalSpace = 100; info->wideSpace = 101; + info->globalPtrBits = 128; info->localeIdType = M.getTypeByName("struct.c_localeid_t"); if( ! info->localeIdType ) { StructType* t = StructType::create(M.getContext(), "struct.c_localeid_t"); @@ -1168,7 +1283,8 @@ namespace { Type* gType = FT->getParamType(0); GlobalPointerInfo & r = info->gTypes[gType]; r.addrFn = F; - info->specialFunctions.insert(F); + //printf("Adding %s\n", F->getName().str().c_str()); + info->specialFunctions.push_back(F); } else if( F->getName().startswith(GLOBAL_FN_GLOBAL_LOCID) && FT->getNumParams() == 1 && FT->getReturnType() == info->localeIdType && @@ -1176,7 +1292,8 @@ namespace { Type* gType = FT->getParamType(0); GlobalPointerInfo & r = info->gTypes[gType]; r.locFn = F; - info->specialFunctions.insert(F); + info->specialFunctions.push_back(F); + //printf("Adding %s\n", F->getName().str().c_str()); } else if( F->getName().startswith(GLOBAL_FN_GLOBAL_NODEID) && FT->getNumParams() == 1 && FT->getReturnType() == info->nodeIdType && @@ -1184,7 +1301,8 @@ namespace { Type* gType = FT->getParamType(0); GlobalPointerInfo & r = info->gTypes[gType]; r.nodeFn = F; - info->specialFunctions.insert(F); + info->specialFunctions.push_back(F); + //printf("Adding %s\n", F->getName().str().c_str()); } else if( F->getName().startswith(GLOBAL_FN_GLOBAL_MAKE) && FT->getNumParams() == 2 && FT->getParamType(0) == info->localeIdType && @@ -1194,21 +1312,24 @@ namespace { Type* gType = FT->getReturnType(); GlobalPointerInfo & r = info->gTypes[gType]; r.makeFn = F; - info->specialFunctions.insert(F); + info->specialFunctions.push_back(F); + //printf("Adding %s\n", F->getName().str().c_str()); } else if( F->getName().startswith(GLOBAL_FN_GLOBAL_TO_WIDE) && FT->getNumParams() == 1 && containsGlobalPointers(info, FT->getParamType(0)) ) { Type* gType = FT->getParamType(0); GlobalPointerInfo & r = info->gTypes[gType]; r.globalToWideFn = F; - info->specialFunctions.insert(F); + info->specialFunctions.push_back(F); + //printf("Adding %s\n", F->getName().str().c_str()); } else if( F->getName().startswith(GLOBAL_FN_WIDE_TO_GLOBAL) && FT->getNumParams() == 1 && containsGlobalPointers(info, FT->getReturnType()) ) { Type* gType = FT->getReturnType(); GlobalPointerInfo & r = info->gTypes[gType]; r.wideToGlobalFn = F; - info->specialFunctions.insert(F); + info->specialFunctions.push_back(F); + //printf("Adding %s\n", F->getName().str().c_str()); } } } @@ -1220,11 +1341,30 @@ namespace { // Wide pointer address space must differ from the local one... assert(info->globalSpace != 0); assert(info->wideSpace != 0); + assert(info->globalPtrBits != 0); assert(info->localeIdType != 0); assert(info->nodeIdType != 0); + // Check that a pointer in the global address space has the correct size. + { + const llvm::DataLayout& dl = M.getDataLayout(); + llvm::Type* testGlobalTy = llvm::Type::getInt8PtrTy(M.getContext(), + info->globalSpace); + llvm::Type* testWideTy = llvm::Type::getInt8PtrTy(M.getContext(), + info->wideSpace); + + bool ok = (dl.getTypeSizeInBits(testGlobalTy) == info->globalPtrBits) && + (dl.getTypeSizeInBits(testWideTy) == info->globalPtrBits); + if (!ok) { + printf("Error: llvmGlobalToWide pass doesn't match DataLayout\n"); + printf("module DataLayout is %s\n", + dl.getStringRepresentation().c_str()); + assert(ok); + } + } + GlobalTypeFixer fixer(M, info, debugPassTwo); - GlobalTypeFixer* TypeMapper = &fixer; + GlobalTypeFixer* TypeMapper = &fixer; /* This transformation operates in two major parts parts: * - (as a prerequisite, source that has global address space pointers, @@ -1238,7 +1378,7 @@ namespace { * to call put/get functions and lower all instructions to operate * exclusively on wide types, so that the global types are no * longer used (even if they still exist in the LLVM context). - */ + */ for (Module::iterator next_func = M.begin(); next_func!= M.end(); ) { @@ -1251,13 +1391,13 @@ namespace { } if( debugPassOne ) { - errs() << "==================================== start pass one\n"; - errs() << "starting pass one with function "; - errs().write_escaped(F->getName()) << '\n'; + dbgs() << "==================================== start pass one\n"; + dbgs() << "starting pass one with function "; + dbgs().write_escaped(F->getName()) << '\n'; } // skip the special functions like wideToGlobal - if (info->specialFunctions.count(F)) { + if (F->getName().startswith(GLOBAL_FN)) { continue; } @@ -1279,7 +1419,7 @@ namespace { std::vector Params; unsigned i = 0; - for (Function::arg_iterator I = F->arg_begin(), + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I!=E; ++I, ++i) { if (containsGlobalPointers(info, I->getType())) { Type *new_type = convertTypeGlobalToWide(&M, info, I->getType()); @@ -1290,12 +1430,12 @@ namespace { } } - + if( debugPassOne ) { // Wait until we have converted the argument types since // we might rename them... before dumping the fn. - F->dump(); - errs() << "-----------------------------\n"; + F->print(dbgs(), nullptr, false, true); + dbgs() << "\n-----------------------------\n"; } // if we don't need to update the function's return value or at least @@ -1325,6 +1465,32 @@ namespace { Function *NF = Function::Create(NFTy, F->getLinkage()); NF->copyAttributesFrom(F); + if (update_return) { + // if it's no longer a pointer, remove pointer-based attributes +#if HAVE_LLVM_VER >= 50 + NF->removeAttributes(AttributeList::ReturnIndex, + AttributeFuncs::typeIncompatible(RetTy)); +#else + NF->removeAttributes(AttributeSet::ReturnIndex, + AttributeSet::get(NF->getContext(), + AttributeSet::ReturnIndex, + AttributeFuncs::typeIncompatible(RetTy))); +#endif + } + if (update_parameters) { + for (size_t i = 0; i < Params.size(); i++ ) { +#if HAVE_LLVM_VER >= 50 + NF->removeAttributes(i+1, + AttributeFuncs::typeIncompatible(Params[i])); +#else + NF->removeAttributes(i+1, + AttributeSet::get(NF->getContext(), + i+1, + AttributeFuncs::typeIncompatible(Params[i]))); +#endif + } + } + #if HAVE_LLVM_VER >= 38 F->getParent()->getFunctionList().insert(F->getIterator(), NF); #else @@ -1348,8 +1514,8 @@ namespace { ++UI; CallSite CS(Old); if (CS.getInstruction()) { - assert(CS.getCalledFunction() == F); - Instruction *Call = CS.getInstruction(); + assert(CS.getCalledFunction() == F); + Instruction *Call = CS.getInstruction(); #if HAVE_LLVM_VER >= 50 const AttributeList &CallPAL = CS.getAttributes(); #else @@ -1373,7 +1539,7 @@ namespace { } } - + // replace_with = add a new call Instruction *New; @@ -1464,10 +1630,11 @@ namespace { nfArg->setName(arg->getName()); // + "_wide"); New->takeName(arg); // AA.replaceWithNewValue(I, New); + } if (containsGlobalPointers(info, F->getReturnType())) { - for (Function::iterator BB = NF->begin(), E = NF->end(); + for (Function::iterator BB = NF->begin(), E = NF->end(); BB != E; ++BB) { if (ReturnInst *RI = dyn_cast(BB->getTerminator())) { Instruction *New; @@ -1476,13 +1643,14 @@ namespace { BB->getInstList().erase(RI); } } - } - + } + // DEBUG: verify function if( debugPassOne ) { - errs() << "verifying new function after pass one: "; - errs().write_escaped(NF->getName()) << '\n'; - NF->dump(); + dbgs() << "verifying new function after pass one: "; + dbgs().write_escaped(NF->getName()) << '\n'; + NF->print(dbgs(), nullptr, false, true); + dbgs() << '\n'; } if( extraChecks ) { #if HAVE_LLVM_VER >= 35 @@ -1496,7 +1664,7 @@ namespace { F->eraseFromParent(); if( debugPassOne ) { - errs() << "==================================== end pass one fn\n"; + dbgs() << "==================================== end pass one fn\n"; } } @@ -1609,12 +1777,15 @@ namespace { if( F->begin() == F->end() ) continue; if( debugPassTwo ) { - errs() << "Pass 2.1 to function ---------- "; - errs().write_escaped(F->getName()) << '\n'; - if( debugPassTwo ) F->dump(); - errs() << "-----------------------------\n"; + dbgs() << "Pass 2.1 to function ---------- "; + dbgs().write_escaped(F->getName()) << '\n'; + if( debugPassTwo ) { + F->print(dbgs(), nullptr, false, true); + dbgs() << '\n'; + } + dbgs() << "-----------------------------\n"; } - + /* for all functions for all basic blocks @@ -1649,7 +1820,7 @@ namespace { } ++I; - + fixer.fixInstruction(insn); if( debugPassTwo ) { @@ -1663,7 +1834,7 @@ namespace { for( ; J != I; ++J ) { Instruction *new_insn = &*J; if( new_insn != prev && new_insn != insn ) - errs() << " new|" << *new_insn << "|" << "\n"; + dbgs() << " new|" << *new_insn << "|" << "\n"; } } } @@ -1678,22 +1849,22 @@ namespace { } if( debugPassTwo ) { - errs() << "After rewriting global ops, function is: "; - errs().write_escaped(F->getName()) << '\n'; - F->dump(); - errs() << "-----------------------------\n"; - errs() << "Now pass 2.2 mapping w2g and g2w: \n"; + dbgs() << "After rewriting global ops, function is: "; + dbgs().write_escaped(F->getName()) << '\n'; + F->print(dbgs(), nullptr, false, true); + dbgs() << "\n-----------------------------\n"; + dbgs() << "Now pass 2.2 mapping w2g and g2w: \n"; } for (Function::iterator BI = F->begin(), BE = F->end(); BI != BE; ) { BasicBlock& BBRef = *BI; BasicBlock* BB = &BBRef; ++BI; - + //if( debugPassTwo ) { - // errs() << BB->getName() << ":\n"; + // dbgs() << BB->getName() << ":\n"; //} - + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { Instruction *insn = &*I; ++I; @@ -1715,14 +1886,14 @@ namespace { } if( debugPassTwo ) { - errs() << "Now pass 2.3 remapping instructions\n"; + dbgs() << "Now pass 2.3 remapping instructions\n"; } for (Function::iterator BI = F->begin(), BE = F->end(); BI != BE; ) { BasicBlock& BBRef = *BI; BasicBlock* BB = &BBRef; ++BI; - + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { Instruction *insn = &*I; ++I; @@ -1739,7 +1910,7 @@ namespace { insn->setName(""); insn->dropAllReferences(); } - + // Delete any junk we have accumulated... // we should have removed all references to global fn's. // references. @@ -1747,28 +1918,34 @@ namespace { Junk.clear(); if( debugPassTwo ) { - errs() << "AFTER PASS 2 the function is:\n"; + dbgs() << "AFTER PASS 2 the function is:\n"; for (Function::iterator BI = F->begin(), BE = F->end(); BI != BE; ) { BasicBlock& BBRef = *BI; BasicBlock* BB = &BBRef; ++BI; - + if( debugPassTwo ) { - errs() << BB->getName() << ":\n"; + dbgs() << BB->getName() << ":\n"; } - + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { Instruction *insn = &*I; ++I; - errs() << " |" << *insn << "|" << "\n"; + dbgs() << " |" << *insn << "|" << "\n"; } } } if( extraChecks ) { #if HAVE_LLVM_VER >= 35 - assert(!verifyFunction(*F, &errs())); + bool ok = !verifyFunction(*F, &errs()); + if (!ok) { + dbgs() << "\n"; + F->print(dbgs(), nullptr, false, true); + dbgs() << F->getName() << "\n"; + assert( 0 && "Verify function failed"); + } #else verifyFunction(*F); #endif @@ -1781,9 +1958,13 @@ namespace { for(specialFunctions_t::iterator I = info->specialFunctions.begin(), E = info->specialFunctions.end(); I != E; ++I ) { - Function* F = *I; - assert( F->use_empty() && "Special functions should've been replaced"); - F->eraseFromParent(); + + Value* v = *I; + if (v) { + Function* F = llvm::cast(v); + assert( F->use_empty() && "Special functions should've been replaced"); + F->eraseFromParent(); + } } // Delete the dummy dependencies preserving function @@ -1791,7 +1972,11 @@ namespace { if( cf ) { Function* f = dyn_cast(cf); if( f ) { +#if HAVE_LLVM_VER >= 50 + info->preservingFn.setValPtr(NULL); +#else info->preservingFn = NULL; +#endif f->eraseFromParent(); } } @@ -1814,10 +1999,6 @@ static RegisterPass X("global-to-wide", "GlobalToWide Pass"); ModulePass *createGlobalToWide(GlobalToWideInfo* info, std::string setLayout) { - assert(info->addrFn); - assert(info->locFn); - assert(info->nodeFn); - assert(info->makeFn); assert(info->getFn); assert(info->putFn); assert(info->getPutFn); @@ -1898,7 +2079,8 @@ void populateFunctionsForGlobalType(Module *module, GlobalToWideInfo* info, Type GLOBAL_FN_GLOBAL_ADDR, module); r.addrFn->setDoesNotAccessMemory(); - info->specialFunctions.insert(r.addrFn); + info->specialFunctions.push_back(r.addrFn); + //printf("Adding %s\n", r.addrFn->getName().str().c_str()); } if( ! r.locFn ) { @@ -1910,7 +2092,8 @@ void populateFunctionsForGlobalType(Module *module, GlobalToWideInfo* info, Type GLOBAL_FN_GLOBAL_LOCID, module); r.locFn->setDoesNotAccessMemory(); - info->specialFunctions.insert(r.locFn); + info->specialFunctions.push_back(r.locFn); + //printf("Adding %s\n", r.locFn->getName().str().c_str()); } if( ! r.nodeFn ) { @@ -1922,7 +2105,8 @@ void populateFunctionsForGlobalType(Module *module, GlobalToWideInfo* info, Type GLOBAL_FN_GLOBAL_NODEID, module); r.nodeFn->setDoesNotAccessMemory(); - info->specialFunctions.insert(r.nodeFn); + info->specialFunctions.push_back(r.nodeFn); + //printf("Adding %s\n", r.nodeFn->getName().str().c_str()); } @@ -1936,7 +2120,8 @@ void populateFunctionsForGlobalType(Module *module, GlobalToWideInfo* info, Type GLOBAL_FN_GLOBAL_MAKE, module); r.makeFn->setDoesNotAccessMemory(); - info->specialFunctions.insert(r.makeFn); + info->specialFunctions.push_back(r.makeFn); + //printf("Adding %s\n", r.makeFn->getName().str().c_str()); } } @@ -1982,7 +2167,8 @@ void populateFunctionsForGlobalToWideType(Module *module, GlobalToWideInfo* info GLOBAL_FN_GLOBAL_TO_WIDE, module); r.globalToWideFn->setDoesNotAccessMemory(); - info->specialFunctions.insert(r.globalToWideFn); + info->specialFunctions.push_back(r.globalToWideFn); + //printf("Adding %s\n", r.globalToWideFn->getName().str().c_str()); } if( ! r.wideToGlobalFn ) { @@ -1994,7 +2180,8 @@ void populateFunctionsForGlobalToWideType(Module *module, GlobalToWideInfo* info GLOBAL_FN_WIDE_TO_GLOBAL, module); r.wideToGlobalFn->setDoesNotAccessMemory(); - info->specialFunctions.insert(r.wideToGlobalFn); + info->specialFunctions.push_back(r.wideToGlobalFn); + //printf("Adding %s\n", r.globalToWideFn->getName().str().c_str()); } } @@ -2011,6 +2198,17 @@ llvm::Function* getWideToGlobalFn(llvm::Module *module, GlobalToWideInfo* info, return r.wideToGlobalFn; } +static +Type* createWidePointerToType(Module* module, GlobalToWideInfo* i, Type* eltTy) +{ + LLVMContext& context = module->getContext(); + // Get the wide pointer struct containing {locale, address} + Type* fields[2]; + fields[0] = i->localeIdType; + fields[1] = PointerType::get(eltTy, 0); + + return StructType::get(context, fields, false); +} Type* convertTypeGlobalToWide(Module* module, GlobalToWideInfo* info, Type* t) { @@ -2081,7 +2279,7 @@ Type* convertTypeGlobalToWide(Module* module, GlobalToWideInfo* info, Type* t) wideArgTypes.resize(fnType->getNumParams()); int i = 0; - for (FunctionType::param_iterator A = fnType->param_begin(), + for (FunctionType::param_iterator A = fnType->param_begin(), A_end = fnType->param_end(); A != A_end; ++A, i++){ Type* param_type = *A; wideArgTypes[i] = convertTypeGlobalToWide(module, info, param_type); @@ -2097,13 +2295,8 @@ Type* convertTypeGlobalToWide(Module* module, GlobalToWideInfo* info, Type* t) if( t->getPointerAddressSpace() == info->globalSpace || t->getPointerAddressSpace() == info->wideSpace ) { - // (old) Replace the pointer with a struct containing {locale, address} - //Type* fields[2]; - //fields[0] = info->localeIdType; - //fields[1] = PointerType::get(wideEltType, 0); - //return StructType::get(context, fields, false); - // Replace the pointer with a normal (packed) pointer. - return PointerType::get(wideEltType, 0); + // Replace the pointer with a struct containing {locale, address} + return createWidePointerToType(module, info, wideEltType); } else { return PointerType::get(wideEltType, t->getPointerAddressSpace()); } @@ -2117,7 +2310,7 @@ Type* convertTypeGlobalToWide(Module* module, GlobalToWideInfo* info, Type* t) return ArrayType::get(wideEltType, arrTy->getNumElements()); } - + if (t->isVectorTy()){ VectorType *vecTy = cast(t); Type *eltType = vecTy->getElementType(); @@ -2126,7 +2319,7 @@ Type* convertTypeGlobalToWide(Module* module, GlobalToWideInfo* info, Type* t) return VectorType::get(wideEltType, vecTy->getNumElements()); } - + assert(0); } diff --git a/compiler/util/llvmUtil.cpp b/compiler/util/llvmUtil.cpp index 5ba30f20a373..bc98b388cc35 100644 --- a/compiler/util/llvmUtil.cpp +++ b/compiler/util/llvmUtil.cpp @@ -66,7 +66,7 @@ llvm::Constant* codegenSizeofLLVM(llvm::Type* type) } static -bool isTypeEquivalent(LLVM_TARGET_DATA * targetData, llvm::Type* a, llvm::Type* b, bool force) +bool isTypeEquivalent(const llvm::DataLayout& layout, llvm::Type* a, llvm::Type* b, bool force) { int64_t aN = arrayVecN(a); int64_t bN = arrayVecN(a); @@ -88,10 +88,10 @@ bool isTypeEquivalent(LLVM_TARGET_DATA * targetData, llvm::Type* a, llvm::Type* } - alignA = targetData->getPrefTypeAlignment(a); - alignB = targetData->getPrefTypeAlignment(b); - sizeA = targetData->getTypeStoreSize(a); - sizeB = targetData->getTypeStoreSize(b); + alignA = layout.getPrefTypeAlignment(a); + alignB = layout.getPrefTypeAlignment(b); + sizeA = layout.getTypeStoreSize(a); + sizeB = layout.getTypeStoreSize(b); // Are they the same size? if( sizeA == sizeB ) return true; @@ -176,7 +176,7 @@ llvm::Value* createTempVarLLVM(llvm::IRBuilder<>* builder, llvm::Type* type, con llvm::Value *convertValueToType( llvm::IRBuilder<> *builder, - LLVM_TARGET_DATA * targetData, + const llvm::DataLayout& layout, llvm::Value *value, llvm::Type *newType, bool isSigned, @@ -252,12 +252,12 @@ llvm::Value *convertValueToType( // This is important in order to handle clang structure expansion // (e.g. calling a function that returns {int64,int64}) if( isArrayVecOrStruct(curType) || isArrayVecOrStruct(newType) ) { - if( isTypeEquivalent(targetData, curType, newType, force) ) { + if( isTypeEquivalent(layout, curType, newType, force) ) { // We turn it into a store/load to convert the type // since LLVM does not allow bit casts on structure types. llvm::Value* tmp_alloc; - if( targetData->getTypeStoreSize(newType) >= - targetData->getTypeStoreSize(curType) ) + if( layout.getTypeStoreSize(newType) >= + layout.getTypeStoreSize(curType) ) tmp_alloc = createTempVarLLVM(builder, newType, ""); else { tmp_alloc = createTempVarLLVM(builder, curType, ""); @@ -465,16 +465,16 @@ PromotedPair convertValuesToLarger( return PromotedPair(NULL, NULL, false); } -int64_t getTypeSizeInBytes(LLVM_TARGET_DATA * layout, llvm::Type* ty) +int64_t getTypeSizeInBytes(const llvm::DataLayout& layout, llvm::Type* ty) { if( ! ty->isSized() ) return -1; // who knows how big it is! - int64_t sz = layout->getTypeSizeInBits(ty); + int64_t sz = layout.getTypeSizeInBits(ty); sz = (sz + 7)/8; // now in bytes. return sz; } -bool isTypeSizeSmallerThan(LLVM_TARGET_DATA * layout, llvm::Type* ty, uint64_t max_size_bytes) +bool isTypeSizeSmallerThan(const llvm::DataLayout& layout, llvm::Type* ty, uint64_t max_size_bytes) { if( ! ty->isSized() ) return false; // who knows how big it is! @@ -493,7 +493,7 @@ bool isTypeSizeSmallerThan(LLVM_TARGET_DATA * layout, llvm::Type* ty, uint64_t m (so that it includes padding) */ static -uint64_t doGetTypeFieldNext(LLVM_TARGET_DATA * layout, llvm::Type* ty, uint64_t offset, uint64_t parent_this_offset, uint64_t parent_next_offset) +uint64_t doGetTypeFieldNext(const llvm::DataLayout& layout, llvm::Type* ty, uint64_t offset, uint64_t parent_this_offset, uint64_t parent_next_offset) { llvm::SequentialType* stype = NULL; llvm::StructType* struct_type = NULL; @@ -502,7 +502,7 @@ uint64_t doGetTypeFieldNext(LLVM_TARGET_DATA * layout, llvm::Type* ty, uint64_t unsigned i, n; uint64_t local_offset, next_offset_here, offset_here; - //ty->dump(); + //ty->print(dbgs(), true); //printf("offset %i parent %i,%i\n", (int) offset, (int) parent_this_offset, (int) parent_next_offset); assert(parent_this_offset <= offset && offset <= parent_next_offset); @@ -519,7 +519,7 @@ uint64_t doGetTypeFieldNext(LLVM_TARGET_DATA * layout, llvm::Type* ty, uint64_t // Not using getTypeSizeInBytes so that: // 1) we get an assertion error if the type is not sized // 2) we use uint64s for the type instead of int64s - uint64_t sz = layout->getTypeSizeInBits(eltType); + uint64_t sz = layout.getTypeSizeInBits(eltType); sz = (sz + 7)/8; // now in bytes. uint64_t this_offset = local_offset / sz; this_offset = parent_this_offset + this_offset*sz; @@ -532,7 +532,7 @@ uint64_t doGetTypeFieldNext(LLVM_TARGET_DATA * layout, llvm::Type* ty, uint64_t // structure type. struct_type = llvm::cast(ty); n = struct_type->getNumElements(); - struct_layout = layout->getStructLayout(struct_type); + struct_layout = layout.getStructLayout(struct_type); // Scroll forward in the structure until we find the last element // starting at offset. @@ -563,7 +563,7 @@ uint64_t doGetTypeFieldNext(LLVM_TARGET_DATA * layout, llvm::Type* ty, uint64_t } -uint64_t getTypeFieldNext(LLVM_TARGET_DATA * layout, llvm::Type* ty, uint64_t offset) +uint64_t getTypeFieldNext(const llvm::DataLayout& layout, llvm::Type* ty, uint64_t offset) { uint64_t sz; @@ -571,7 +571,7 @@ uint64_t getTypeFieldNext(LLVM_TARGET_DATA * layout, llvm::Type* ty, uint64_t of // 1) we get an assertion error if the type is not sized // 2) we use uint64s for the type instead of int64s assert(ty->isSized()); - sz = layout->getTypeSizeInBits(ty); + sz = layout.getTypeSizeInBits(ty); sz = (sz + 7)/8; // now in bytes. diff --git a/doc/rst/technotes/llvm.rst b/doc/rst/technotes/llvm.rst index 0564ca44268c..651ce4ce1dc6 100644 --- a/doc/rst/technotes/llvm.rst +++ b/doc/rst/technotes/llvm.rst @@ -24,7 +24,7 @@ Building the LLVM support ------------------------- To build the compiler with LLVM support for extern blocks, ``--llvm`` code -generation, but no support for ``--llvm-wide-opt``: +generation, and support for ``--llvm-wide-opt``: .. code-block:: sh @@ -35,18 +35,6 @@ generation, but no support for ``--llvm-wide-opt``: make # you might want to do e.g. make -j 16 for a parallel build -To build the compiler with LLVM support for extern blocks, ``--llvm code`` -generation, and also support ``--llvm-wide-opt``: - -.. code-block:: sh - - source ./util/setchplenv.bash - export CHPL_LLVM=llvm - export CHPL_WIDE_POINTERS=node16 # optional but useful with --llvm-wide-opt - # see discussion below - - make # you might want to do e.g. make -j 16 for a parallel build - Note: * If you have a built llvm in ``third-party/llvm/install``, even if you forget @@ -81,27 +69,15 @@ Passing ``--fast`` will cause LLVM optimizations to run. The ``--ccflags`` option can control which LLVM optimizations are run, using the same syntax as flags to clang. -Additionally, if you build your compiler with ``CHPL_WIDE_POINTERS=node16`` and -then compile a program with ``--llvm --llvm-wide-opt --fast``, you will allow -LLVM optimizations to work with global memory. For example, the Loop Invariant -Code Motion (LICM) optimization might be able to hoist an access of a remote -variable - ie, a 'get' - out of a loop. This optimization has produced better -performance with some benchmarks. - -CHPL_WIDE_POINTERS=node16 is necessary to use ``--llvm-wide-opt`` because of -historical limitations in LLVM support for pointers in different address spaces -having different sizes. The default ``CHPL_WIDE_POINTERS=struct`` uses a -128-bit wide pointer, but ``CHPL_WIDE_POINTERS=node16`` packs wide pointers -into a pointer-sized value (normally 64 bits) which includes 16 bits of node -number in order to avoid these problems. We plan to remove this requirement in -the future to enable ``--llvm-wide-opt`` to work with -``CHPL_WIDE_POINTERS=struct``. +Additionally, if you compile a program with ``--llvm --llvm-wide-opt +--fast``, you will allow LLVM optimizations to work with global memory. +For example, the Loop Invariant Code Motion (LICM) optimization might be +able to hoist an access of a remote variable - ie, a 'get' - out of a +loop. This optimization has produced better performance with some +benchmarks. Caveats: -* ``--llvm-wide-opt`` currently requires ``CHPL_WIDE_POINTERS=node16``, which - means that it will not work with the numa locale model since packed pointers - will not have a sublocale. * ``--llvm-wide-opt`` may add communication to or from a task's stack, so it may not function correctly for combinations of tasking and communication layers in which some task has a stack outside of an acceptable region for diff --git a/doc/rst/usingchapel/chplenv.rst b/doc/rst/usingchapel/chplenv.rst index 3f97fdcd7a05..4d7dd7bc9a69 100644 --- a/doc/rst/usingchapel/chplenv.rst +++ b/doc/rst/usingchapel/chplenv.rst @@ -638,39 +638,6 @@ CHPL_LLVM # Ubuntu 16.04 apt-get install llvm-3.7-dev llvm-3.7 clang-3.7 libclang-3.7-dev libedit-dev - -.. _readme-chplenv.CHPL_WIDE_POINTERS: - -CHPL_WIDE_POINTERS -~~~~~~~~~~~~~~~~~~ - Optionally, the ``CHPL_WIDE_POINTERS`` environment variable can be used to - specify the wide pointer format for multilocale programs. Current options - are: - - ======== ============================================================= - Value Description - ======== ============================================================= - struct store wide pointers in structures which may span more than - one word - nodeN ("N" a number, 2 <= N <= 60) store wide pointers in single - words, with N bits used to store the node (top level locale) - number and the rest containing the address on that node - ======== ============================================================= - - ``CHPL_WIDE_POINTERS`` is used to select between two modes of operation. One is - universally applicable; the other has restricted applicability but may - reduce remote communication. - - If unset, ``CHPL_WIDE_POINTERS`` defaults to ``struct``. This setting works in - all situations and in particular, it is compatible with all locale models - including the hierarchical ones. The ``nodeN`` option does not work with - hierarchical locale models and is only useful with the LLVM backend, which - is currently experimental. However, when used, it allows LLVM to understand - and optimize remote transfers, potentially reducing the amount of - communication a program performs. See :ref:`readme-llvm` for more - information about ``CHPL_WIDE_POINTERS=nodeN``. - - .. _readme-chplenv.CHPL_UNWIND: CHPL_UNWIND diff --git a/man/chpl.rst b/man/chpl.rst index 4018c27d9c3c..7b6383b6d234 100644 --- a/man/chpl.rst +++ b/man/chpl.rst @@ -687,12 +687,6 @@ OPTIONS corresponds with and overrides the $CHPL\_TIMERS environment variable (defaults to 'generic'). -**--wide-pointers ** - - Specify the wide pointer format format. This flag corresponds with and - overrides the $CHPL\_WIDE\_POINTERS environment variable (defaults to - 'struct'). - *Compiler Information Options* **--copyright** diff --git a/modules/internal/ChapelEnv.chpl b/modules/internal/ChapelEnv.chpl index 4e422d87acc9..6e115fc12f03 100644 --- a/modules/internal/ChapelEnv.chpl +++ b/modules/internal/ChapelEnv.chpl @@ -102,9 +102,6 @@ module ChapelEnv { /* See :ref:`readme-chplenv.CHPL_REGEXP` for more information. */ param CHPL_REGEXP:string = __primitive("get compiler variable", "CHPL_REGEXP"); - /* See :ref:`readme-chplenv.CHPL_WIDE_POINTERS` for more information. */ - param CHPL_WIDE_POINTERS:string = __primitive("get compiler variable", "CHPL_WIDE_POINTERS"); - /* See :ref:`readme-chplenv.CHPL_LLVM` for more information. */ param CHPL_LLVM:string = __primitive("get compiler variable", "CHPL_LLVM"); } diff --git a/modules/packages/ZMQ.chpl b/modules/packages/ZMQ.chpl index 78156b89f2af..bf49eddd624e 100644 --- a/modules/packages/ZMQ.chpl +++ b/modules/packages/ZMQ.chpl @@ -267,7 +267,8 @@ module ZMQ { use Reflection; use ExplicitRefCount; - private extern var errno: c_int; + private extern proc chpl_macro_int_errno():c_int; + private inline proc errno return chpl_macro_int_errno():c_int; // Types pragma "no doc" diff --git a/runtime/etc/Makefile.launcher b/runtime/etc/Makefile.launcher index 11f7f971e6e8..aec3a24c28a2 100644 --- a/runtime/etc/Makefile.launcher +++ b/runtime/etc/Makefile.launcher @@ -46,7 +46,7 @@ all: FORCE echo "#include \"chpl_compilation_config.c\"" >> $(LAUNCHER_SRC_NAME) echo "const char launcher_real_suffix[] = \"$(REAL_SUFFIX)\";" >> $(LAUNCHER_SRC_NAME) echo "const char launcher_exe_suffix[] = \"$(EXE_SUFFIX)\";" >> $(LAUNCHER_SRC_NAME) - $(CC) $(GEN_CFLAGS) $(COMP_GEN_CFLAGS) -c -o $(TMPBINNAME)_launcher.o $(CHPL_LN_INCS) -I. $(LAUNCHER_SRC_NAME) + $(CC) $(GEN_CFLAGS) $(COMP_GEN_CFLAGS) -c -o $(TMPBINNAME)_launcher.o -DLAUNCHER $(CHPL_LN_INCS) -I. $(LAUNCHER_SRC_NAME) $(LD) $(GEN_LFLAGS) $(COMP_GEN_LFLAGS) -o $(TMPBINNAME)_launcher -L$(CHPL_LN_LIB_DIR) $(TMPBINNAME)_launcher.o $(CHPL_LN_LIB_DIR)/main_launcher.o -lchpllaunch -lm $(LAUNCH_LIBS) cp $(TMPBINNAME)_launcher $(TMPBINNAME) diff --git a/runtime/include/chpl-comm-compiler-llvm-support.h b/runtime/include/chpl-comm-compiler-llvm-support.h index 473493b54361..e765fe6024a9 100644 --- a/runtime/include/chpl-comm-compiler-llvm-support.h +++ b/runtime/include/chpl-comm-compiler-llvm-support.h @@ -36,18 +36,18 @@ // atomic_ordering = loadOrStoreInst->getOrdering() static inline -void chpl_gen_comm_get_ctl(void *dst_addr, wide_ptr_t src, int64_t n, int64_t ctl) +void chpl_gen_comm_get_ctl(void *dst_addr, + c_nodeid_t src_node, void *src_addr, + uintptr_t n, int64_t ctl) { - c_nodeid_t src_node = chpl_wide_ptr_get_node(src); - void* src_addr = chpl_wide_ptr_get_address(src); chpl_gen_comm_get(dst_addr, src_node, src_addr, sizeof(uint8_t)*n, CHPL_TYPE_uint8_t, CHPL_COMM_UNKNOWN_ID, -1, 0); } static inline -void chpl_gen_comm_put_ctl(wide_ptr_t dst, void *src_addr, int64_t n, int64_t ctl) +void chpl_gen_comm_put_ctl(c_nodeid_t dst_node, void* dst_addr, + void *src_addr, + uintptr_t n, int64_t ctl) { - c_nodeid_t dst_node = chpl_wide_ptr_get_node(dst); - void* dst_addr = chpl_wide_ptr_get_address(dst); chpl_gen_comm_put(src_addr, dst_node, dst_addr, sizeof(uint8_t)*n, CHPL_TYPE_uint8_t, CHPL_COMM_UNKNOWN_ID, -1, 0); } @@ -56,55 +56,50 @@ void chpl_gen_comm_put_ctl(wide_ptr_t dst, void *src_addr, int64_t n, int64_t ct // such code, but it could appear during optimization. Note that the // dst and src regions could overlap. static inline -void chpl_gen_comm_getput(wide_ptr_t dst, wide_ptr_t src, int64_t n) +void chpl_gen_comm_getput(c_nodeid_t dst_node, void* dst_addr, + c_nodeid_t src_node, void* src_addr, + uintptr_t n) { - c_nodeid_t src_node = chpl_wide_ptr_get_node(src); - void* src_addr = chpl_wide_ptr_get_address(src); - c_nodeid_t dst_node = chpl_wide_ptr_get_node(dst); - void* dst_addr = chpl_wide_ptr_get_address(dst); - if (chpl_nodeID == dst_node && chpl_nodeID == src_node) { memmove(dst_addr, src_addr, n); } else if( chpl_nodeID == dst_node ) { - chpl_gen_comm_get_ctl(dst_addr, src, n, 0); + chpl_gen_comm_get_ctl(dst_addr, src_node, src_addr, n, 0); } else if( chpl_nodeID == src_node ) { - chpl_gen_comm_put_ctl(dst, src_addr, n, 0); + chpl_gen_comm_put_ctl(dst_node, dst_addr, src_addr, n, 0); } else { char buf[1024]; - int64_t chunk; - int64_t i; + uintptr_t chunk; + uintptr_t i; for( i = 0; i < n; i += chunk) { chunk = n - i; if( chunk > sizeof(buf) ) chunk = sizeof(buf); - chpl_gen_comm_get_ctl(buf, src, chunk, 0); - chpl_gen_comm_put_ctl(dst, buf, chunk, 0); - src = chpl_return_wide_ptr_add(src, chunk); - dst = chpl_return_wide_ptr_add(dst, chunk); + chpl_gen_comm_get_ctl(buf, src_node, src_addr, chunk, 0); + chpl_gen_comm_put_ctl(dst_node, dst_addr, buf, chunk, 0); + src_addr = ((unsigned char*)src_addr) + chunk; + dst_addr = ((unsigned char*)dst_addr) + chunk; } } } static inline -void chpl_gen_comm_memset(wide_ptr_t dst, int8_t src, int64_t n) +void chpl_gen_comm_memset(c_nodeid_t dst_node, void* dst_addr, + int8_t src_byte, uintptr_t n) { - c_nodeid_t dst_node = chpl_wide_ptr_get_node(dst); - void* dst_addr = chpl_wide_ptr_get_address(dst); - if (chpl_nodeID == dst_node) { - memset(dst_addr, src, n); + memset(dst_addr, src_byte, n); } else { char buf[1024]; - int64_t chunk; - int64_t i; + uintptr_t chunk; + uintptr_t i; size_t max = sizeof(buf); if( n < max ) max = n; - memset(buf, src, max); + memset(buf, src_byte, max); for( i = 0; i < n; i += chunk) { chunk = n - i; if( chunk > sizeof(buf) ) chunk = sizeof(buf); - chpl_gen_comm_put_ctl(dst, buf, chunk, 0); - dst = chpl_return_wide_ptr_add(dst, chunk); + chpl_gen_comm_put_ctl(dst_node, dst_addr, buf, chunk, 0); + dst_addr = ((unsigned char*)dst_addr) + chunk; } } } diff --git a/runtime/include/chpl-wide-ptr-fns.h b/runtime/include/chpl-wide-ptr-fns.h index 7357adb6dc6d..cad7d69f2802 100644 --- a/runtime/include/chpl-wide-ptr-fns.h +++ b/runtime/include/chpl-wide-ptr-fns.h @@ -20,95 +20,41 @@ #ifndef _chpl_wide_ptr_impl_h_ #define _chpl_wide_ptr_impl_h_ -// Make sure that chpltypes will define a wide pointer type. -#ifdef CHPL_WIDE_POINTER_STRUCT -#else - -#ifdef CHPL_WIDE_POINTER_PACKED -#else -#error missing either CHPL_WIDE_POINTER_STRUCT or CHPL_WIDE_POINTER_PACKED -#endif - -#endif - - #include "chpltypes.h" // chpl_error needed for some of the packed wide pointer error cases. #include "error.h" -// Methods for working with wide pointers (packed or not) and localeID_t. +// Methods for working with wide pointers and localeID_t. // There are several variants of many of these in order to simplify // different code generation tasks. The versions that take in a pointer // to a wide_ptr_t generally do so in order to easily be used with // a variety of wide pointer types with the structure representation, // since one can't cast a structure... -#ifdef CHPL_WIDE_POINTER_PACKED - #define CHPL_WIDE_PTR_MARK_BITS 1 - #define CHPL_WIDE_PTR_MARK 1 - - #define CHPL_PTR_BITS (64-CHPL_WIDE_PTR_MARK_BITS-CHPL_WIDE_POINTER_NODE_BITS) - - #define CHPL_NODE_MASK ( (1ULL << CHPL_WIDE_POINTER_NODE_BITS) - 1ULL) - #define CHPL_PTR_MASK ( (1ULL << CHPL_PTR_BITS) - 1ULL) -#endif - static inline wide_ptr_t chpl_return_wide_ptr_node(c_nodeid_t node, void* addr) { -#ifndef CHPL_WIDE_POINTER_PACKED wide_ptr_t dst; dst.locale = chpl_rt_buildLocaleID(node, 0); dst.addr = addr; return dst; -#else - uint64_t uptr = (uint64_t) addr; - uint64_t unode = node; - uint64_t mark = CHPL_WIDE_PTR_MARK; - uint64_t ret; - if( (uptr & CHPL_PTR_MASK) != uptr ) { - chpl_internal_error("Local pointer too big to fit into wide pointer"); - } - if( (unode & CHPL_NODE_MASK) != unode) { - chpl_internal_error("Node ID too big to fit into wide pointer"); - } - ret = uptr; - ret |= unode << CHPL_PTR_BITS; - ret |= mark << (CHPL_PTR_BITS+CHPL_WIDE_POINTER_NODE_BITS); - return (wide_ptr_t) ret; -#endif } static inline void chpl_check_wide_ptr(wide_ptr_t ptr) { -#ifndef CHPL_WIDE_POINTER_PACKED - if( chpl_rt_nodeFromLocaleID(ptr.locale) < 0 ) - chpl_internal_error("Bad wide pointer"); -#else - uint64_t uptr = (uint64_t) ptr; - uint64_t mark = uptr >> (CHPL_PTR_BITS+CHPL_WIDE_POINTER_NODE_BITS); - if( uptr == 0 ) return; // NULL is always OK - if( mark != CHPL_WIDE_PTR_MARK ) - chpl_internal_error("Bad wide pointer"); -#endif + //if( chpl_rt_nodeFromLocaleID(ptr.locale) < 0 ) + // chpl_internal_error("Bad wide pointer"); } static inline wide_ptr_t chpl_return_wide_ptr_loc(chpl_localeID_t loc, void * addr) { -#ifndef CHPL_WIDE_POINTER_PACKED wide_ptr_t dst; dst.locale = loc; dst.addr = addr; return dst; -#else - // packed wide pointers do not store sublocale, - // so we just throw that info away - // and reconstruct it later from the local address. - return chpl_return_wide_ptr_node(chpl_rt_nodeFromLocaleID(loc), addr); -#endif } static inline @@ -121,28 +67,15 @@ wide_ptr_t chpl_return_wide_ptr_loc_ptr(const chpl_localeID_t* loc, void * addr) static inline c_nodeid_t chpl_wide_ptr_get_node(wide_ptr_t ptr) { -#ifndef CHPL_WIDE_POINTER_PACKED chpl_check_wide_ptr(ptr); return chpl_rt_nodeFromLocaleID(ptr.locale); -#else - uint64_t uptr = (uint64_t) ptr; - chpl_check_wide_ptr(ptr); - return (uptr >> CHPL_PTR_BITS) & CHPL_NODE_MASK; -#endif } static inline void* chpl_wide_ptr_get_address(wide_ptr_t ptr) { -#ifndef CHPL_WIDE_POINTER_PACKED chpl_check_wide_ptr(ptr); return ptr.addr; -#else - uint64_t uptr = (uint64_t) ptr; - chpl_check_wide_ptr(ptr); - uptr &= CHPL_PTR_MASK; - return (void*) uptr; -#endif } static inline @@ -150,12 +83,7 @@ chpl_localeID_t chpl_wide_ptr_get_localeID(wide_ptr_t ptr) { chpl_localeID_t loc; chpl_check_wide_ptr(ptr); -#ifndef CHPL_WIDE_POINTER_PACKED loc = ptr.locale; -#else - // packed wide pointers do not store sublocale - loc = chpl_rt_buildLocaleID(chpl_wide_ptr_get_node(ptr), 0); -#endif return loc; } @@ -170,12 +98,8 @@ void chpl_wide_ptr_read_localeID(wide_ptr_t ptr, static inline wide_ptr_t chpl_return_wide_ptr_add(wide_ptr_t ptr, size_t amt) { -#ifndef CHPL_WIDE_POINTER_PACKED ptr.addr = (void*) (((unsigned char*)ptr.addr) + amt); return ptr; -#else - return (wide_ptr_t) (((unsigned char*)ptr) + amt); -#endif } diff --git a/runtime/include/chplcgfns.h b/runtime/include/chplcgfns.h index 23659f577409..7cc3389d0d38 100644 --- a/runtime/include/chplcgfns.h +++ b/runtime/include/chplcgfns.h @@ -62,7 +62,6 @@ extern const char* CHPL_NETWORK_ATOMICS; extern const char* CHPL_GMP; extern const char* CHPL_HWLOC; extern const char* CHPL_REGEXP; -extern const char* CHPL_WIDE_POINTERS; extern const char* CHPL_LLVM; extern const char* CHPL_AUX_FILESYS; extern const char* CHPL_UNWIND; diff --git a/runtime/include/chpltypes.h b/runtime/include/chpltypes.h index feb567a16f69..df7886903f56 100644 --- a/runtime/include/chpltypes.h +++ b/runtime/include/chpltypes.h @@ -135,24 +135,12 @@ static inline int isActualSublocID(c_sublocid_t subloc) { // include chpl-locale-model.h. (note: moving it out of the #ifdef leads to // problems building the launcher). -#ifdef CHPL_WIDE_POINTER_STRUCT #include "chpl-locale-model.h" typedef struct wide_ptr_s { chpl_localeID_t locale; void* addr; } wide_ptr_t; typedef wide_ptr_t* ptr_wide_ptr_t; -#else -// It's useful to have the type for a wide pointer-to-void. -// This is the packed pointer version (the other version would be -// {{node,subloc}, address}). -#ifdef CHPL_WIDE_POINTER_PACKED -#include "chpl-locale-model.h" -typedef void * wide_ptr_t; -typedef wide_ptr_t* ptr_wide_ptr_t; -#ifndef CHPL_WIDE_POINTER_NODE_BITS -#error Missing packed wide pointer definition CHPL_WIDE_POINTER_NODE_BITS -#endif #else // Just don't define wide_ptr_t. That way, other programs @@ -164,10 +152,6 @@ typedef wide_ptr_t* ptr_wide_ptr_t; // builds using chpl-comm.h (which uses that type to declare the // global variables registry), can continue to work. typedef void* ptr_wide_ptr_t; -#endif - -#endif - #endif // LAUNCHER #define nil 0 diff --git a/runtime/include/qio/sys.h b/runtime/include/qio/sys.h index 56b55645ecd3..ff8cf89c1770 100644 --- a/runtime/include/qio/sys.h +++ b/runtime/include/qio/sys.h @@ -25,6 +25,7 @@ #include #include +#include #include #include #include @@ -39,6 +40,9 @@ extern "C" { #endif +// This function just returns errno (needed for LLVM compiles) +static inline int chpl_macro_int_errno(void) { return errno; } + #ifndef LUSTRE_SUPER_MAGIC // Magic value to be found in the statfs man page #define LUSTRE_SUPER_MAGIC 0x0BD00BD0 diff --git a/test/chplenv/chplconfig/correctness/chplconfig b/test/chplenv/chplconfig/correctness/chplconfig index 29eaa40f256f..df6b6c35d4a6 100644 --- a/test/chplenv/chplconfig/correctness/chplconfig +++ b/test/chplenv/chplconfig/correctness/chplconfig @@ -20,6 +20,5 @@ CHPL_NETWORK_ATOMICS=none CHPL_GMP=none CHPL_HWLOC=hwloc CHPL_REGEXP=re2 -CHPL_WIDE_POINTERS=struct CHPL_LLVM=none CHPL_AUX_FILESYS=none diff --git a/test/chplenv/chplconfig/correctness/correctness.chpl b/test/chplenv/chplconfig/correctness/correctness.chpl index c9a498e9c23d..388bc7960518 100644 --- a/test/chplenv/chplconfig/correctness/correctness.chpl +++ b/test/chplenv/chplconfig/correctness/correctness.chpl @@ -22,5 +22,4 @@ writeln("CHPL_ATOMICS: intrinsics +"); writeln("CHPL_GMP: none +"); writeln("CHPL_HWLOC: hwloc +"); writeln("CHPL_REGEXP: re2 +"); -writeln("CHPL_WIDE_POINTERS: struct +"); writeln("CHPL_AUX_FILESYS: none +"); diff --git a/test/chplenv/printchplenv/printchplenv.chpl b/test/chplenv/printchplenv/printchplenv.chpl index e7349bad0b7a..3b5cf1ff62ce 100644 --- a/test/chplenv/printchplenv/printchplenv.chpl +++ b/test/chplenv/printchplenv/printchplenv.chpl @@ -21,5 +21,4 @@ if CHPL_COMM != 'none' then writeln('CHPL_GMP: ', CHPL_GMP); writeln('CHPL_HWLOC: ', CHPL_HWLOC); writeln('CHPL_REGEXP: ', CHPL_REGEXP); -writeln('CHPL_WIDE_POINTERS: ', CHPL_WIDE_POINTERS); writeln('CHPL_AUX_FILESYS: ', CHPL_AUX_FILESYS); diff --git a/test/compflags/albrecht/chplenv/chplenv.chpl b/test/compflags/albrecht/chplenv/chplenv.chpl index 427efabdfed4..f05719447ba6 100644 --- a/test/compflags/albrecht/chplenv/chplenv.chpl +++ b/test/compflags/albrecht/chplenv/chplenv.chpl @@ -19,7 +19,6 @@ writeln("CHPL_NETWORK_ATOMICS=",CHPL_NETWORK_ATOMICS); writeln("CHPL_GMP=",CHPL_GMP); writeln("CHPL_HWLOC=",CHPL_HWLOC); writeln("CHPL_REGEXP=",CHPL_REGEXP); -writeln("CHPL_WIDE_POINTERS=",CHPL_WIDE_POINTERS); writeln("CHPL_LLVM=",CHPL_LLVM); writeln("CHPL_AUX_FILESYS=",CHPL_AUX_FILESYS); writeln("CHPL_HOME=",CHPL_HOME); diff --git a/test/compflags/bradc/help/userhelp.good b/test/compflags/bradc/help/userhelp.good index ceecc0978c9c..128cb1f226c6 100644 --- a/test/compflags/bradc/help/userhelp.good +++ b/test/compflags/bradc/help/userhelp.good @@ -156,7 +156,6 @@ Compiler Configuration Options: --target-platform Platform for cross-compilation --tasks Specify tasking implementation --timers Specify timer implementation - --wide-pointers Specify wide pointer format Compiler Information Options: --copyright Show copyright diff --git a/test/functions/iterators/angeles/COMPOPTS b/test/functions/iterators/angeles/COMPOPTS deleted file mode 100644 index 0545c44dc8b0..000000000000 --- a/test/functions/iterators/angeles/COMPOPTS +++ /dev/null @@ -1 +0,0 @@ ---ccflags -D_BSD_SOURCE diff --git a/test/llvm/function-args/ref_nonnull.chpl b/test/llvm/function-args/ref_nonnull.chpl index 2d5d8e686243..6bfb84e19684 100644 --- a/test/llvm/function-args/ref_nonnull.chpl +++ b/test/llvm/function-args/ref_nonnull.chpl @@ -1,7 +1,12 @@ // This test checks whether nonnull attributes are generated for // attributes that have ref intent -//CHECK: i64 @f_chpl(i64* nonnull %x_chpl, i64* nonnull %y_chpl, i64 %z_chpl) +//CHECK: i64 @f_chpl( +//CHECK: i64* nonnull +//CHECK: i64* nonnull +//CHECK: i64 +//CHECK-NOT: nonnull +//CHECK: ) proc f(ref x : int, const ref y : int, z : int) { diff --git a/test/llvm/llvm-invariant/function_local_const.chpl b/test/llvm/llvm-invariant/function_local_const.chpl index e7747da8f060..bd7120b54ad0 100644 --- a/test/llvm/llvm-invariant/function_local_const.chpl +++ b/test/llvm/llvm-invariant/function_local_const.chpl @@ -7,11 +7,14 @@ record A var a : int; } - -// CHECK: store %A_chpl %{{[0-9]+}}, %A_chpl* %localConst_chpl -// CHECK-NEXT: %[[REG1:[0-9]+]] = bitcast %A_chpl* %localConst_chpl to i8* -// CHECK-NEXT: %{{[0-9]+}} = call {}* @llvm.invariant.start.p0i8(i64 8, i8* %[[REG1]]) - +// CHECK: @_construct_A_chpl +// CHECK-DAG: call {}* @llvm.invariant.start.p0i8(i64 8, i8* [[CAST:%.*]]) +// CHECK-DAG: [[CAST]] = bitcast %A_chpl* [[PTR:%.*]] to i8* +// CHECK_DAG: store %A_chpl {{%.*}}, %A_chpl* [[PTR]] +// CHECK: getelementptr +// CHECK-SAME:[[PTR]] +// CHECK: load +// CHECK: ret proc f(n) { const localConst = new A(n*10); diff --git a/test/llvm/llvm-invariant/program_constructs.chpl b/test/llvm/llvm-invariant/program_constructs.chpl index ecf314f2afdb..7a6f51efd767 100644 --- a/test/llvm/llvm-invariant/program_constructs.chpl +++ b/test/llvm/llvm-invariant/program_constructs.chpl @@ -11,24 +11,33 @@ proc f(n) var sum = 0; for i in 1..10 { -// CHECK: store %A_chpl %{{[0-9]+}}, %A_chpl* %localConst_chpl -// CHECK-NEXT: %[[REG1:[0-9]+]] = bitcast %A_chpl* %localConst_chpl to i8* -// CHECK-NEXT: %{{[0-9]+}} = call {}* @llvm.invariant.start.p0i8(i64 8, i8* %[[REG1]]) +// CHECK: @_construct_A_chpl +// CHECK-DAG: call {}* @llvm.invariant.start.p0i8(i64 8, i8* [[CAST1:%.*]]) +// CHECK-DAG: [[CAST1]] = bitcast %A_chpl* [[PTR1:%.*]] to i8* +// CHECK_DAG: store %A_chpl {{%.*}}, %A_chpl* [[PTR1]] +// CHECK: getelementptr +// CHECK-SAME:[[PTR1]] const localConst = new A(i*10); sum += localConst.a; } if n < 10 { -// CHECK: store %A_chpl %{{[0-9]+}}, %A_chpl* %localConst_chpl2 -// CHECK-NEXT: %[[REG2:[0-9]+]] = bitcast %A_chpl* %localConst_chpl2 to i8* -// CHECK-NEXT: %{{[0-9]+}} = call {}* @llvm.invariant.start.p0i8(i64 8, i8* %[[REG2]]) +// CHECK: @_construct_A_chpl +// CHECK-DAG: call {}* @llvm.invariant.start.p0i8(i64 8, i8* [[CAST2:%.*]]) +// CHECK-DAG: [[CAST2]] = bitcast %A_chpl* [[PTR2:%.*]] to i8* +// CHECK_DAG: store %A_chpl {{%.*}}, %A_chpl* [[PTR2]] +// CHECK: getelementptr +// CHECK-SAME:[[PTR2]] const localConst = new A(n*10); return localConst.a; } else { -// CHECK: store %A_chpl %{{[0-9]+}}, %A_chpl* %localConst_chpl3 -// CHECK-NEXT: %[[REG3:[0-9]+]] = bitcast %A_chpl* %localConst_chpl3 to i8* -// CHECK-NEXT: %{{[0-9]+}} = call {}* @llvm.invariant.start.p0i8(i64 8, i8* %[[REG3]]) +// CHECK: @_construct_A_chpl +// CHECK-DAG: call {}* @llvm.invariant.start.p0i8(i64 8, i8* [[CAST3:%.*]]) +// CHECK-DAG: [[CAST3]] = bitcast %A_chpl* [[PTR3:%.*]] to i8* +// CHECK_DAG: store %A_chpl {{%.*}}, %A_chpl* [[PTR3]] +// CHECK: getelementptr +// CHECK-SAME:[[PTR3]] const localConst = new A(n*5); return localConst.a; } diff --git a/test/performance/ferguson/MYCOMPOPTS b/test/performance/ferguson/MYCOMPOPTS index dbd52ea6f33f..3fcc93ef1255 100755 --- a/test/performance/ferguson/MYCOMPOPTS +++ b/test/performance/ferguson/MYCOMPOPTS @@ -35,15 +35,12 @@ f('--no-llvm --no-cache-remote ' + checkMaxAttained, 'c') # C backend, cache remote f('--no-llvm --cache-remote', 'c-cache') -#if do_llvm: +if do_llvm: # LLVM backend, no cache remote -# f('--llvm --no-cache-remote', 'llvm') + #f('--llvm --no-cache-remote', 'llvm') # LLVM backend, cache remote -# print(basecompopts + '--llvm --cache-remote' + perfkeys) -# f('--llvm --cache-remote', 'llvm-cache') + #f('--llvm --cache-remote', 'llvm-cache') # LLVM backend, llvm wide opts, no cache remote -## print(basecompopts + '--llvm --llvm-wide-opt --no-cache-remote') -# f('--llvm --llvm-wide-opt --no-cache-remote', 'llvm-wide-opt') + f('--llvm --llvm-wide-opt --no-cache-remote', 'llvm-wide-opt') # LLVM backend, llvm wide opts, cache remote -## print(basecompopts + '--llvm --llvm-wide-opt --cache-remote') -# f('--llvm --llvm-wide-opt --cache-remote', 'llvm-wide-opt-cache') + f('--llvm --llvm-wide-opt --cache-remote', 'llvm-wide-opt-cache') diff --git a/test/performance/ferguson/remote-record-read-licm.cc-compopts b/test/performance/ferguson/remote-record-read-licm.cc-compopts new file mode 120000 index 000000000000..e6d0994923f5 --- /dev/null +++ b/test/performance/ferguson/remote-record-read-licm.cc-compopts @@ -0,0 +1 @@ +MYCOMPOPTS \ No newline at end of file diff --git a/test/performance/ferguson/remote-record-read-licm.cc-keys b/test/performance/ferguson/remote-record-read-licm.cc-keys new file mode 100644 index 000000000000..4985a08acdc2 --- /dev/null +++ b/test/performance/ferguson/remote-record-read-licm.cc-keys @@ -0,0 +1,2 @@ +GETs: +seconds elapsed: diff --git a/test/performance/ferguson/remote-record-read-licm.chpl b/test/performance/ferguson/remote-record-read-licm.chpl new file mode 100644 index 000000000000..64910be876ab --- /dev/null +++ b/test/performance/ferguson/remote-record-read-licm.chpl @@ -0,0 +1,31 @@ +use CommUtil; + +config const n = 100000; +record C { + var x:real; + var y:real; + var z:real; +} + +var c = new C(1.0,2.0,3.0); + +var totalToPrint = 0; + +start(); + + +on Locales[1] { + var sum = 0; + for x in 1..n { + // We really hope to see the access to c.x + // loop-invariant-code-motioned. + sum += c.x:int; + } + totalToPrint = sum; +} + +stop(); + +writeln(totalToPrint); + +report(maxPuts=1, maxOns=1); diff --git a/test/performance/ferguson/remote-record-read-licm.compopts b/test/performance/ferguson/remote-record-read-licm.compopts new file mode 120000 index 000000000000..e6d0994923f5 --- /dev/null +++ b/test/performance/ferguson/remote-record-read-licm.compopts @@ -0,0 +1 @@ +MYCOMPOPTS \ No newline at end of file diff --git a/test/performance/ferguson/remote-record-read-licm.good b/test/performance/ferguson/remote-record-read-licm.good new file mode 100644 index 000000000000..f7393e847d34 --- /dev/null +++ b/test/performance/ferguson/remote-record-read-licm.good @@ -0,0 +1 @@ +100000 diff --git a/test/performance/ferguson/remote-record-read-licm.graph b/test/performance/ferguson/remote-record-read-licm.graph new file mode 100644 index 000000000000..60a20569ddb5 --- /dev/null +++ b/test/performance/ferguson/remote-record-read-licm.graph @@ -0,0 +1,5 @@ +perfkeys: GETs:, GETs: +files: remote-record-read-licm-c.dat, remote-record-read-licm-c-cache.dat +graphkeys: c GETs, c-cache GETs +ylabel: Count +graphtitle: remote-record-read-licm diff --git a/third-party/llvm/Makefile b/third-party/llvm/Makefile index f43b4ae93e02..29e7112e2c72 100644 --- a/third-party/llvm/Makefile +++ b/third-party/llvm/Makefile @@ -100,12 +100,15 @@ $(LLVM_HEADER_FILE): # if cmake us also using Make. $(LLVM_SUPPORT_FILE): if [ -f $(LLVM_CONFIGURE_FILE) ]; then \ + cd $(LLVM_BUILD_DIR) && $(MAKE) install-cmake-exports ; \ cd $(LLVM_BUILD_DIR)/lib/Support && $(MAKE) ; \ cd $(LLVM_BUILD_DIR)/lib/Support && $(MAKE) install ; \ - elif [ -f $(LLVM_BUILD_DIR)/Makefile ]; then \ + elif [ -f $(LLVM_BUILD_DIR)/Makefile ]; then \ + cd $(LLVM_BUILD_DIR) && $(MAKE) install-cmake-exports ; \ cd $(LLVM_BUILD_DIR) && $(MAKE) LLVMSupport ; \ cd $(LLVM_BUILD_DIR) && $(MAKE) install-LLVMSupport ; \ else \ + cd $(LLVM_BUILD_DIR) && cmake --build . --target install-cmake-exports ; \ cd $(LLVM_BUILD_DIR) && cmake --build . --target LLVMSupport ; \ cd $(LLVM_BUILD_DIR) && cmake --build . --target install-LLVMSupport ; \ fi diff --git a/third-party/llvm/README b/third-party/llvm/README index 1a2a4f21763d..2f7be48ee6c2 100644 --- a/third-party/llvm/README +++ b/third-party/llvm/README @@ -11,4 +11,4 @@ please refer to $CHPL_HOME/doc/technotes/llvm.rst. For more information about LLVM itself, please refer to the website above or to the README in the llvm/ subdirectory of this directory. -The version of LLVM used here is 3.6. +The version of LLVM used here is 4.0 with two patches. diff --git a/third-party/llvm/llvm-4.0.1-BasicAliasAnalysis-patch.txt b/third-party/llvm/llvm-4.0.1-BasicAliasAnalysis-patch.txt new file mode 100644 index 000000000000..8c69a4056ff3 --- /dev/null +++ b/third-party/llvm/llvm-4.0.1-BasicAliasAnalysis-patch.txt @@ -0,0 +1,18 @@ +--- llvm/lib/Analysis/BasicAliasAnalysis.cpp 2017-01-27 11:16:33.000000000 -0500 ++++ ../BasicAliasAnalysis.cpp 2017-09-24 08:14:45.995366393 -0400 +@@ -340,9 +340,12 @@ + /// particular for 32b programs with negative indices that rely on two's + /// complement wrap-arounds for precise alias information. + static int64_t adjustToPointerSize(int64_t Offset, unsigned PointerSize) { +- assert(PointerSize <= 64 && "Invalid PointerSize!"); +- unsigned ShiftBits = 64 - PointerSize; +- return (int64_t)((uint64_t)Offset << ShiftBits) >> ShiftBits; ++ if (PointerSize < 64) { ++ unsigned ShiftBits = 64 - PointerSize; ++ return (int64_t)((uint64_t)Offset << ShiftBits) >> ShiftBits; ++ } else { ++ return Offset; ++ } + } + + /// If V is a symbolic pointer expression, decompose it into a base pointer diff --git a/third-party/llvm/llvm-4.0.1-ValueTracking-patch.txt b/third-party/llvm/llvm-4.0.1-ValueTracking-patch.txt new file mode 100644 index 000000000000..e4059f7f973b --- /dev/null +++ b/third-party/llvm/llvm-4.0.1-ValueTracking-patch.txt @@ -0,0 +1,11 @@ +--- llvm/lib/Analysis/ValueTracking.cpp ++++ llvm/lib/Analysis/ValueTracking.cpp +@@ -3064,7 +3064,7 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, + if (!GEP->accumulateConstantOffset(DL, GEPOffset)) + break; + +- ByteOffset += GEPOffset.getSExtValue(); ++ ByteOffset += GEPOffset.sextOrTrunc(ByteOffset.getBitWidth()); + + Ptr = GEP->getPointerOperand(); + } else if (Operator::getOpcode(Ptr) == Instruction::BitCast || diff --git a/third-party/llvm/unpack-llvm.sh b/third-party/llvm/unpack-llvm.sh index 6755d8bf6495..a4005e97ae02 100755 --- a/third-party/llvm/unpack-llvm.sh +++ b/third-party/llvm/unpack-llvm.sh @@ -24,6 +24,8 @@ tar xf llvm-4.0.1.src.tar.xz tar xf cfe-4.0.1.src.tar.xz mv llvm-4.0.1.src llvm mv cfe-4.0.1.src llvm/tools/clang +patch -p0 < llvm-4.0.1-BasicAliasAnalysis-patch.txt +patch -p0 < llvm-4.0.1-ValueTracking-patch.txt #tar xf llvm-3.9.1.src.tar.xz #tar xf cfe-3.9.1.src.tar.xz diff --git a/util/printchplenv b/util/printchplenv index 48286b8397fb..c22d4baa30a5 100755 --- a/util/printchplenv +++ b/util/printchplenv @@ -121,12 +121,6 @@ def print_mode(mode='list', anonymize=False): regexp = chpl_regexp.get() print_var('CHPL_REGEXP', regexp, mode, 're', ('runtime',)) - wide_pointers = chpl_wide_pointers.get() - print_var('CHPL_WIDE_POINTERS', wide_pointers, mode, 'wide') - if mode == 'make': - wide_pointer_defines = chpl_wide_pointers.get('define') - print_var('CHPL_WIDE_POINTERS_DEFINES', wide_pointer_defines, mode, 'widedef') - if mode != 'list': llvm = chpl_llvm.get() print_var('CHPL_LLVM', llvm, mode, 'llvm', ('compiler',))