| //===-- OpenACC.cpp -- OpenACC directive lowering -------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/ |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "flang/Lower/OpenACC.h" |
| |
| #include "flang/Common/idioms.h" |
| #include "flang/Lower/Bridge.h" |
| #include "flang/Lower/ConvertType.h" |
| #include "flang/Lower/DirectivesCommon.h" |
| #include "flang/Lower/Mangler.h" |
| #include "flang/Lower/PFTBuilder.h" |
| #include "flang/Lower/StatementContext.h" |
| #include "flang/Lower/Support/Utils.h" |
| #include "flang/Lower/SymbolMap.h" |
| #include "flang/Optimizer/Builder/BoxValue.h" |
| #include "flang/Optimizer/Builder/Complex.h" |
| #include "flang/Optimizer/Builder/FIRBuilder.h" |
| #include "flang/Optimizer/Builder/HLFIRTools.h" |
| #include "flang/Optimizer/Builder/IntrinsicCall.h" |
| #include "flang/Optimizer/Builder/Todo.h" |
| #include "flang/Optimizer/Dialect/FIRType.h" |
| #include "flang/Optimizer/OpenACC/Support/FIROpenACCUtils.h" |
| #include "flang/Parser/parse-tree-visitor.h" |
| #include "flang/Parser/parse-tree.h" |
| #include "flang/Parser/tools.h" |
| #include "flang/Semantics/expression.h" |
| #include "flang/Semantics/scope.h" |
| #include "flang/Semantics/tools.h" |
| #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" |
| #include "mlir/Dialect/OpenACC/OpenACCUtils.h" |
| #include "mlir/IR/IRMapping.h" |
| #include "mlir/IR/MLIRContext.h" |
| #include "mlir/Support/LLVM.h" |
| #include "llvm/ADT/STLExtras.h" |
| #include "llvm/ADT/ScopeExit.h" |
| #include "llvm/Frontend/OpenACC/ACC.h.inc" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/ErrorHandling.h" |
| |
| #define DEBUG_TYPE "flang-lower-openacc" |
| |
| static llvm::cl::opt<bool> generateDefaultBounds( |
| "openacc-generate-default-bounds", |
| llvm::cl::desc("Whether to generate default bounds for arrays."), |
| llvm::cl::init(false)); |
| |
| static llvm::cl::opt<bool> strideIncludeLowerExtent( |
| "openacc-stride-include-lower-extent", |
| llvm::cl::desc( |
| "Whether to include the lower dimensions extents in the stride."), |
| llvm::cl::init(true)); |
| |
| static llvm::cl::opt<bool> lowerDoLoopToAccLoop( |
| "openacc-do-loop-to-acc-loop", |
| llvm::cl::desc("Whether to lower do loops as `acc.loop` operations."), |
| llvm::cl::init(true)); |
| |
| static llvm::cl::opt<bool> enableSymbolRemapping( |
| "openacc-remap-symbols", |
| llvm::cl::desc("Whether to remap symbols that appears in data clauses."), |
| llvm::cl::init(true)); |
| |
| static llvm::cl::opt<bool> enableDevicePtrRemap( |
| "openacc-remap-device-ptr-symbols", |
| llvm::cl::desc("sub-option of openacc-remap-symbols for deviceptr clause"), |
| llvm::cl::init(false)); |
| |
| // Special value for * passed in device_type or gang clauses. |
| static constexpr std::int64_t starCst = -1; |
| |
| static unsigned routineCounter = 0; |
| static constexpr llvm::StringRef accRoutinePrefix = "acc_routine_"; |
| static constexpr llvm::StringRef accPrivateInitName = "acc.private.init"; |
| static constexpr llvm::StringRef accReductionInitName = "acc.reduction.init"; |
| |
| static mlir::Location |
| genOperandLocation(Fortran::lower::AbstractConverter &converter, |
| const Fortran::parser::AccObject &accObject) { |
| mlir::Location loc = converter.genUnknownLocation(); |
| Fortran::common::visit( |
| Fortran::common::visitors{ |
| [&](const Fortran::parser::Designator &designator) { |
| loc = converter.genLocation(designator.source); |
| }, |
| [&](const Fortran::parser::Name &name) { |
| loc = converter.genLocation(name.source); |
| }}, |
| accObject.u); |
| return loc; |
| } |
| |
| static void addOperands(llvm::SmallVectorImpl<mlir::Value> &operands, |
| llvm::SmallVectorImpl<int32_t> &operandSegments, |
| llvm::ArrayRef<mlir::Value> clauseOperands) { |
| operands.append(clauseOperands.begin(), clauseOperands.end()); |
| operandSegments.push_back(clauseOperands.size()); |
| } |
| |
| static void addOperand(llvm::SmallVectorImpl<mlir::Value> &operands, |
| llvm::SmallVectorImpl<int32_t> &operandSegments, |
| const mlir::Value &clauseOperand) { |
| if (clauseOperand) { |
| operands.push_back(clauseOperand); |
| operandSegments.push_back(1); |
| } else { |
| operandSegments.push_back(0); |
| } |
| } |
| |
| template <typename Op> |
| static Op |
| createDataEntryOp(fir::FirOpBuilder &builder, mlir::Location loc, |
| mlir::Value baseAddr, std::stringstream &name, |
| mlir::SmallVector<mlir::Value> bounds, bool structured, |
| bool implicit, mlir::acc::DataClause dataClause, |
| mlir::Type retTy, llvm::ArrayRef<mlir::Value> async, |
| llvm::ArrayRef<mlir::Attribute> asyncDeviceTypes, |
| llvm::ArrayRef<mlir::Attribute> asyncOnlyDeviceTypes, |
| bool unwrapBoxAddr = false, mlir::Value isPresent = {}) { |
| mlir::Value varPtrPtr; |
| llvm::SmallVector<mlir::Value, 8> operands; |
| llvm::SmallVector<int32_t, 8> operandSegments; |
| |
| addOperand(operands, operandSegments, baseAddr); |
| addOperand(operands, operandSegments, varPtrPtr); |
| addOperands(operands, operandSegments, bounds); |
| addOperands(operands, operandSegments, async); |
| |
| Op op = Op::create(builder, loc, retTy, operands); |
| op.setNameAttr(builder.getStringAttr(name.str())); |
| op.setStructured(structured); |
| op.setImplicit(implicit); |
| op.setDataClause(dataClause); |
| if (auto pointerLikeTy = |
| mlir::dyn_cast<mlir::acc::PointerLikeType>(baseAddr.getType())) { |
| op.setVarType(pointerLikeTy.getElementType()); |
| } else { |
| assert(mlir::isa<mlir::acc::MappableType>(baseAddr.getType()) && |
| "expected mappable"); |
| op.setVarType(baseAddr.getType()); |
| } |
| |
| op->setAttr(Op::getOperandSegmentSizeAttr(), |
| builder.getDenseI32ArrayAttr(operandSegments)); |
| if (!asyncDeviceTypes.empty()) |
| op.setAsyncOperandsDeviceTypeAttr(builder.getArrayAttr(asyncDeviceTypes)); |
| if (!asyncOnlyDeviceTypes.empty()) |
| op.setAsyncOnlyAttr(builder.getArrayAttr(asyncOnlyDeviceTypes)); |
| return op; |
| } |
| |
| static void addDeclareAttr(fir::FirOpBuilder &builder, mlir::Operation *op, |
| mlir::acc::DataClause clause) { |
| if (!op) |
| return; |
| op->setAttr(mlir::acc::getDeclareAttrName(), |
| mlir::acc::DeclareAttr::get(builder.getContext(), |
| mlir::acc::DataClauseAttr::get( |
| builder.getContext(), clause))); |
| } |
| |
| static mlir::func::FuncOp |
| createDeclareFunc(mlir::OpBuilder &modBuilder, fir::FirOpBuilder &builder, |
| mlir::Location loc, llvm::StringRef funcName, |
| llvm::SmallVector<mlir::Type> argsTy = {}, |
| llvm::SmallVector<mlir::Location> locs = {}) { |
| auto funcTy = mlir::FunctionType::get(modBuilder.getContext(), argsTy, {}); |
| auto funcOp = mlir::func::FuncOp::create(modBuilder, loc, funcName, funcTy); |
| funcOp.setVisibility(mlir::SymbolTable::Visibility::Private); |
| builder.createBlock(&funcOp.getRegion(), funcOp.getRegion().end(), argsTy, |
| locs); |
| builder.setInsertionPointToEnd(&funcOp.getRegion().back()); |
| mlir::func::ReturnOp::create(builder, loc); |
| builder.setInsertionPointToStart(&funcOp.getRegion().back()); |
| return funcOp; |
| } |
| |
| template <typename Op> |
| static Op |
| createSimpleOp(fir::FirOpBuilder &builder, mlir::Location loc, |
| const llvm::SmallVectorImpl<mlir::Value> &operands, |
| const llvm::SmallVectorImpl<int32_t> &operandSegments) { |
| llvm::ArrayRef<mlir::Type> argTy; |
| Op op = Op::create(builder, loc, argTy, operands); |
| op->setAttr(Op::getOperandSegmentSizeAttr(), |
| builder.getDenseI32ArrayAttr(operandSegments)); |
| return op; |
| } |
| |
| template <typename EntryOp> |
| static void createDeclareAllocFuncWithArg(mlir::OpBuilder &modBuilder, |
| fir::FirOpBuilder &builder, |
| mlir::Location loc, mlir::Type descTy, |
| llvm::StringRef funcNamePrefix, |
| std::stringstream &asFortran, |
| mlir::acc::DataClause clause) { |
| auto crtInsPt = builder.saveInsertionPoint(); |
| std::stringstream registerFuncName; |
| registerFuncName << funcNamePrefix.str() |
| << Fortran::lower::declarePostAllocSuffix.str(); |
| |
| if (!mlir::isa<fir::ReferenceType>(descTy)) |
| descTy = fir::ReferenceType::get(descTy); |
| auto registerFuncOp = createDeclareFunc( |
| modBuilder, builder, loc, registerFuncName.str(), {descTy}, {loc}); |
| |
| llvm::SmallVector<mlir::Value> bounds; |
| std::stringstream asFortranDesc; |
| asFortranDesc << asFortran.str(); |
| // Start a structured region with declare_enter. |
| EntryOp descEntryOp = createDataEntryOp<EntryOp>( |
| builder, loc, registerFuncOp.getArgument(0), asFortranDesc, bounds, |
| /*structured=*/false, /*implicit=*/true, clause, descTy, |
| /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{}); |
| mlir::acc::DeclareEnterOp::create( |
| builder, loc, mlir::acc::DeclareTokenType::get(descEntryOp.getContext()), |
| mlir::ValueRange(descEntryOp.getAccVar())); |
| |
| modBuilder.setInsertionPointAfter(registerFuncOp); |
| builder.restoreInsertionPoint(crtInsPt); |
| } |
| |
| template <typename ExitOp> |
| static void createDeclareDeallocFuncWithArg( |
| mlir::OpBuilder &modBuilder, fir::FirOpBuilder &builder, mlir::Location loc, |
| mlir::Type descTy, llvm::StringRef funcNamePrefix, |
| std::stringstream &asFortran, mlir::acc::DataClause clause) { |
| auto crtInsPt = builder.saveInsertionPoint(); |
| // Generate the pre dealloc function. |
| std::stringstream preDeallocFuncName; |
| preDeallocFuncName << funcNamePrefix.str() |
| << Fortran::lower::declarePreDeallocSuffix.str(); |
| if (!mlir::isa<fir::ReferenceType>(descTy)) |
| descTy = fir::ReferenceType::get(descTy); |
| auto preDeallocOp = createDeclareFunc( |
| modBuilder, builder, loc, preDeallocFuncName.str(), {descTy}, {loc}); |
| |
| mlir::Value var = preDeallocOp.getArgument(0); |
| |
| llvm::SmallVector<mlir::Value> bounds; |
| mlir::acc::GetDevicePtrOp entryOp = |
| createDataEntryOp<mlir::acc::GetDevicePtrOp>( |
| builder, loc, var, asFortran, bounds, |
| /*structured=*/false, /*implicit=*/false, clause, var.getType(), |
| /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{}); |
| mlir::acc::DeclareExitOp::create(builder, loc, mlir::Value{}, |
| mlir::ValueRange(entryOp.getAccVar())); |
| |
| if constexpr (std::is_same_v<ExitOp, mlir::acc::CopyoutOp> || |
| std::is_same_v<ExitOp, mlir::acc::UpdateHostOp>) |
| ExitOp::create(builder, entryOp.getLoc(), entryOp.getAccVar(), |
| entryOp.getVar(), entryOp.getVarType(), entryOp.getBounds(), |
| entryOp.getAsyncOperands(), |
| entryOp.getAsyncOperandsDeviceTypeAttr(), |
| entryOp.getAsyncOnlyAttr(), entryOp.getDataClause(), |
| /*structured=*/false, /*implicit=*/false, |
| builder.getStringAttr(*entryOp.getName())); |
| else |
| ExitOp::create(builder, entryOp.getLoc(), entryOp.getAccVar(), |
| entryOp.getBounds(), entryOp.getAsyncOperands(), |
| entryOp.getAsyncOperandsDeviceTypeAttr(), |
| entryOp.getAsyncOnlyAttr(), entryOp.getDataClause(), |
| /*structured=*/false, /*implicit=*/false, |
| builder.getStringAttr(*entryOp.getName())); |
| |
| // Generate the post dealloc function. |
| modBuilder.setInsertionPointAfter(preDeallocOp); |
| std::stringstream postDeallocFuncName; |
| postDeallocFuncName << funcNamePrefix.str() |
| << Fortran::lower::declarePostDeallocSuffix.str(); |
| auto postDeallocOp = createDeclareFunc( |
| modBuilder, builder, loc, postDeallocFuncName.str(), {descTy}, {loc}); |
| |
| var = postDeallocOp.getArgument(0); |
| // End structured region with declare_exit. |
| mlir::acc::GetDevicePtrOp postEntryOp = |
| createDataEntryOp<mlir::acc::GetDevicePtrOp>( |
| builder, loc, var, asFortran, bounds, |
| /*structured=*/false, /*implicit=*/true, clause, var.getType(), |
| /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{}); |
| mlir::acc::DeclareExitOp::create(builder, loc, mlir::Value{}, |
| mlir::ValueRange(postEntryOp.getAccVar())); |
| modBuilder.setInsertionPointAfter(postDeallocOp); |
| builder.restoreInsertionPoint(crtInsPt); |
| } |
| |
| Fortran::semantics::Symbol & |
| getSymbolFromAccObject(const Fortran::parser::AccObject &accObject) { |
| if (const auto *designator = |
| std::get_if<Fortran::parser::Designator>(&accObject.u)) { |
| if (const auto *name = |
| Fortran::parser::GetDesignatorNameIfDataRef(*designator)) |
| return *name->symbol; |
| if (const auto *arrayElement = |
| Fortran::parser::Unwrap<Fortran::parser::ArrayElement>( |
| *designator)) { |
| const Fortran::parser::Name &name = |
| Fortran::parser::GetLastName(arrayElement->base); |
| return *name.symbol; |
| } |
| if (const auto *component = |
| Fortran::parser::Unwrap<Fortran::parser::StructureComponent>( |
| *designator)) { |
| return *component->component.symbol; |
| } |
| } else if (const auto *name = |
| std::get_if<Fortran::parser::Name>(&accObject.u)) { |
| return *name->symbol; |
| } |
| llvm::report_fatal_error("Could not find symbol"); |
| } |
| |
| /// Used to generate atomic.read operation which is created in existing |
| /// location set by builder. |
| static inline void |
| genAtomicCaptureStatement(Fortran::lower::AbstractConverter &converter, |
| mlir::Value fromAddress, mlir::Value toAddress, |
| mlir::Type elementType, mlir::Location loc) { |
| // Generate `atomic.read` operation for atomic assignment statements |
| fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); |
| |
| mlir::acc::AtomicReadOp::create(firOpBuilder, loc, fromAddress, toAddress, |
| mlir::TypeAttr::get(elementType), |
| /*ifCond=*/mlir::Value{}); |
| } |
| |
| /// Used to generate atomic.write operation which is created in existing |
| /// location set by builder. |
| static inline void |
| genAtomicWriteStatement(Fortran::lower::AbstractConverter &converter, |
| mlir::Value lhsAddr, mlir::Value rhsExpr, |
| mlir::Location loc, |
| mlir::Value *evaluatedExprValue = nullptr) { |
| // Generate `atomic.write` operation for atomic assignment statements |
| fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); |
| |
| mlir::Type varType = fir::unwrapRefType(lhsAddr.getType()); |
| // Create a conversion outside the capture block. |
| auto insertionPoint = firOpBuilder.saveInsertionPoint(); |
| firOpBuilder.setInsertionPointAfter(rhsExpr.getDefiningOp()); |
| rhsExpr = firOpBuilder.createConvert(loc, varType, rhsExpr); |
| firOpBuilder.restoreInsertionPoint(insertionPoint); |
| |
| mlir::acc::AtomicWriteOp::create(firOpBuilder, loc, lhsAddr, rhsExpr, |
| /*ifCond=*/mlir::Value{}); |
| } |
| |
| /// Used to generate atomic.update operation which is created in existing |
| /// location set by builder. |
| static inline void genAtomicUpdateStatement( |
| Fortran::lower::AbstractConverter &converter, mlir::Value lhsAddr, |
| mlir::Type varType, const Fortran::parser::Variable &assignmentStmtVariable, |
| const Fortran::parser::Expr &assignmentStmtExpr, mlir::Location loc, |
| mlir::Operation *atomicCaptureOp = nullptr, |
| Fortran::lower::StatementContext *atomicCaptureStmtCtx = nullptr) { |
| // Generate `atomic.update` operation for atomic assignment statements |
| fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); |
| mlir::Location currentLocation = converter.getCurrentLocation(); |
| |
| // Create the omp.atomic.update or acc.atomic.update operation |
| // |
| // func.func @_QPsb() { |
| // %0 = fir.alloca i32 {bindc_name = "a", uniq_name = "_QFsbEa"} |
| // %1 = fir.alloca i32 {bindc_name = "b", uniq_name = "_QFsbEb"} |
| // %2 = fir.load %1 : !fir.ref<i32> |
| // omp.atomic.update %0 : !fir.ref<i32> { |
| // ^bb0(%arg0: i32): |
| // %3 = arith.addi %arg0, %2 : i32 |
| // omp.yield(%3 : i32) |
| // } |
| // return |
| // } |
| |
| auto getArgExpression = |
| [](std::list<Fortran::parser::ActualArgSpec>::const_iterator it) { |
| const auto &arg{std::get<Fortran::parser::ActualArg>((*it).t)}; |
| const auto *parserExpr{ |
| std::get_if<Fortran::common::Indirection<Fortran::parser::Expr>>( |
| &arg.u)}; |
| return parserExpr; |
| }; |
| |
| // Lower any non atomic sub-expression before the atomic operation, and |
| // map its lowered value to the semantic representation. |
| Fortran::lower::ExprToValueMap exprValueOverrides; |
| // Max and min intrinsics can have a list of Args. Hence we need a list |
| // of nonAtomicSubExprs to hoist. Currently, only the load is hoisted. |
| llvm::SmallVector<const Fortran::lower::SomeExpr *> nonAtomicSubExprs; |
| Fortran::common::visit( |
| Fortran::common::visitors{ |
| [&](const Fortran::common::Indirection< |
| Fortran::parser::FunctionReference> &funcRef) -> void { |
| const auto &args{ |
| std::get<std::list<Fortran::parser::ActualArgSpec>>( |
| funcRef.value().v.t)}; |
| std::list<Fortran::parser::ActualArgSpec>::const_iterator beginIt = |
| args.begin(); |
| std::list<Fortran::parser::ActualArgSpec>::const_iterator endIt = |
| args.end(); |
| const auto *exprFirst{getArgExpression(beginIt)}; |
| if (exprFirst && exprFirst->value().source == |
| assignmentStmtVariable.GetSource()) { |
| // Add everything except the first |
| beginIt++; |
| } else { |
| // Add everything except the last |
| endIt--; |
| } |
| std::list<Fortran::parser::ActualArgSpec>::const_iterator it; |
| for (it = beginIt; it != endIt; it++) { |
| const Fortran::common::Indirection<Fortran::parser::Expr> *expr = |
| getArgExpression(it); |
| if (expr) |
| nonAtomicSubExprs.push_back(Fortran::semantics::GetExpr(*expr)); |
| } |
| }, |
| [&](const auto &op) -> void { |
| using T = std::decay_t<decltype(op)>; |
| if constexpr (std::is_base_of< |
| Fortran::parser::Expr::IntrinsicBinary, |
| T>::value) { |
| const auto &exprLeft{std::get<0>(op.t)}; |
| const auto &exprRight{std::get<1>(op.t)}; |
| if (exprLeft.value().source == assignmentStmtVariable.GetSource()) |
| nonAtomicSubExprs.push_back( |
| Fortran::semantics::GetExpr(exprRight)); |
| else |
| nonAtomicSubExprs.push_back( |
| Fortran::semantics::GetExpr(exprLeft)); |
| } |
| }, |
| }, |
| assignmentStmtExpr.u); |
| Fortran::lower::StatementContext nonAtomicStmtCtx; |
| Fortran::lower::StatementContext *stmtCtxPtr = &nonAtomicStmtCtx; |
| if (!nonAtomicSubExprs.empty()) { |
| // Generate non atomic part before all the atomic operations. |
| auto insertionPoint = firOpBuilder.saveInsertionPoint(); |
| if (atomicCaptureOp) { |
| assert(atomicCaptureStmtCtx && "must specify statement context"); |
| firOpBuilder.setInsertionPoint(atomicCaptureOp); |
| // Any clean-ups associated with the expression lowering |
| // must also be generated outside of the atomic update operation |
| // and after the atomic capture operation. |
| // The atomicCaptureStmtCtx will be finalized at the end |
| // of the atomic capture operation generation. |
| stmtCtxPtr = atomicCaptureStmtCtx; |
| } |
| mlir::Value nonAtomicVal; |
| for (auto *nonAtomicSubExpr : nonAtomicSubExprs) { |
| nonAtomicVal = fir::getBase(converter.genExprValue( |
| currentLocation, *nonAtomicSubExpr, *stmtCtxPtr)); |
| exprValueOverrides.try_emplace(nonAtomicSubExpr, nonAtomicVal); |
| } |
| if (atomicCaptureOp) |
| firOpBuilder.restoreInsertionPoint(insertionPoint); |
| } |
| |
| mlir::Operation *atomicUpdateOp = nullptr; |
| atomicUpdateOp = |
| mlir::acc::AtomicUpdateOp::create(firOpBuilder, currentLocation, lhsAddr, |
| /*ifCond=*/mlir::Value{}); |
| |
| llvm::SmallVector<mlir::Type> varTys = {varType}; |
| llvm::SmallVector<mlir::Location> locs = {currentLocation}; |
| firOpBuilder.createBlock(&atomicUpdateOp->getRegion(0), {}, varTys, locs); |
| mlir::Value val = |
| fir::getBase(atomicUpdateOp->getRegion(0).front().getArgument(0)); |
| |
| exprValueOverrides.try_emplace( |
| Fortran::semantics::GetExpr(assignmentStmtVariable), val); |
| { |
| // statement context inside the atomic block. |
| converter.overrideExprValues(&exprValueOverrides); |
| Fortran::lower::StatementContext atomicStmtCtx; |
| mlir::Value rhsExpr = fir::getBase(converter.genExprValue( |
| *Fortran::semantics::GetExpr(assignmentStmtExpr), atomicStmtCtx)); |
| mlir::Value convertResult = |
| firOpBuilder.createConvert(currentLocation, varType, rhsExpr); |
| mlir::acc::YieldOp::create(firOpBuilder, currentLocation, convertResult); |
| converter.resetExprOverrides(); |
| } |
| firOpBuilder.setInsertionPointAfter(atomicUpdateOp); |
| } |
| |
| /// Processes an atomic construct with write clause. |
| void genAtomicWrite(Fortran::lower::AbstractConverter &converter, |
| const Fortran::parser::AccAtomicWrite &atomicWrite, |
| mlir::Location loc) { |
| const Fortran::parser::AssignmentStmt &stmt = |
| std::get<Fortran::parser::Statement<Fortran::parser::AssignmentStmt>>( |
| atomicWrite.t) |
| .statement; |
| const Fortran::evaluate::Assignment &assign = *stmt.typedAssignment->v; |
| Fortran::lower::StatementContext stmtCtx; |
| // Get the value and address of atomic write operands. |
| mlir::Value rhsExpr = |
| fir::getBase(converter.genExprValue(assign.rhs, stmtCtx)); |
| mlir::Value lhsAddr = |
| fir::getBase(converter.genExprAddr(assign.lhs, stmtCtx)); |
| genAtomicWriteStatement(converter, lhsAddr, rhsExpr, loc); |
| } |
| |
| /// Processes an atomic construct with read clause. |
| void genAtomicRead(Fortran::lower::AbstractConverter &converter, |
| const Fortran::parser::AccAtomicRead &atomicRead, |
| mlir::Location loc) { |
| const auto &assignmentStmtExpr = std::get<Fortran::parser::Expr>( |
| std::get<Fortran::parser::Statement<Fortran::parser::AssignmentStmt>>( |
| atomicRead.t) |
| .statement.t); |
| const auto &assignmentStmtVariable = std::get<Fortran::parser::Variable>( |
| std::get<Fortran::parser::Statement<Fortran::parser::AssignmentStmt>>( |
| atomicRead.t) |
| .statement.t); |
| |
| Fortran::lower::StatementContext stmtCtx; |
| const Fortran::semantics::SomeExpr &fromExpr = |
| *Fortran::semantics::GetExpr(assignmentStmtExpr); |
| mlir::Type elementType = converter.genType(fromExpr); |
| mlir::Value fromAddress = |
| fir::getBase(converter.genExprAddr(fromExpr, stmtCtx)); |
| mlir::Value toAddress = fir::getBase(converter.genExprAddr( |
| *Fortran::semantics::GetExpr(assignmentStmtVariable), stmtCtx)); |
| genAtomicCaptureStatement(converter, fromAddress, toAddress, elementType, |
| loc); |
| } |
| |
| /// Processes an atomic construct with update clause. |
| void genAtomicUpdate(Fortran::lower::AbstractConverter &converter, |
| const Fortran::parser::AccAtomicUpdate &atomicUpdate, |
| mlir::Location loc) { |
| const auto &assignmentStmtExpr = std::get<Fortran::parser::Expr>( |
| std::get<Fortran::parser::Statement<Fortran::parser::AssignmentStmt>>( |
| atomicUpdate.t) |
| .statement.t); |
| const auto &assignmentStmtVariable = std::get<Fortran::parser::Variable>( |
| std::get<Fortran::parser::Statement<Fortran::parser::AssignmentStmt>>( |
| atomicUpdate.t) |
| .statement.t); |
| |
| Fortran::lower::StatementContext stmtCtx; |
| mlir::Value lhsAddr = fir::getBase(converter.genExprAddr( |
| *Fortran::semantics::GetExpr(assignmentStmtVariable), stmtCtx)); |
| mlir::Type varType = fir::unwrapRefType(lhsAddr.getType()); |
| genAtomicUpdateStatement(converter, lhsAddr, varType, assignmentStmtVariable, |
| assignmentStmtExpr, loc); |
| } |
| |
| /// Processes an atomic construct with capture clause. |
| void genAtomicCapture(Fortran::lower::AbstractConverter &converter, |
| const Fortran::parser::AccAtomicCapture &atomicCapture, |
| mlir::Location loc) { |
| fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); |
| |
| const Fortran::parser::AssignmentStmt &stmt1 = |
| std::get<Fortran::parser::AccAtomicCapture::Stmt1>(atomicCapture.t) |
| .v.statement; |
| const Fortran::evaluate::Assignment &assign1 = *stmt1.typedAssignment->v; |
| const auto &stmt1Var{std::get<Fortran::parser::Variable>(stmt1.t)}; |
| const auto &stmt1Expr{std::get<Fortran::parser::Expr>(stmt1.t)}; |
| const Fortran::parser::AssignmentStmt &stmt2 = |
| std::get<Fortran::parser::AccAtomicCapture::Stmt2>(atomicCapture.t) |
| .v.statement; |
| const Fortran::evaluate::Assignment &assign2 = *stmt2.typedAssignment->v; |
| const auto &stmt2Var{std::get<Fortran::parser::Variable>(stmt2.t)}; |
| const auto &stmt2Expr{std::get<Fortran::parser::Expr>(stmt2.t)}; |
| |
| // Pre-evaluate expressions to be used in the various operations inside |
| // `atomic.capture` since it is not desirable to have anything other than |
| // a `atomic.read`, `atomic.write`, or `atomic.update` operation |
| // inside `atomic.capture` |
| Fortran::lower::StatementContext stmtCtx; |
| // LHS evaluations are common to all combinations of `atomic.capture` |
| mlir::Value stmt1LHSArg = |
| fir::getBase(converter.genExprAddr(assign1.lhs, stmtCtx)); |
| mlir::Value stmt2LHSArg = |
| fir::getBase(converter.genExprAddr(assign2.lhs, stmtCtx)); |
| |
| // Type information used in generation of `atomic.update` operation |
| mlir::Type stmt1VarType = |
| fir::getBase(converter.genExprValue(assign1.lhs, stmtCtx)).getType(); |
| mlir::Type stmt2VarType = |
| fir::getBase(converter.genExprValue(assign2.lhs, stmtCtx)).getType(); |
| |
| mlir::Operation *atomicCaptureOp = nullptr; |
| atomicCaptureOp = |
| mlir::acc::AtomicCaptureOp::create(firOpBuilder, loc, |
| /*ifCond=*/mlir::Value{}); |
| |
| firOpBuilder.createBlock(&(atomicCaptureOp->getRegion(0))); |
| mlir::Block &block = atomicCaptureOp->getRegion(0).back(); |
| firOpBuilder.setInsertionPointToStart(&block); |
| if (Fortran::parser::CheckForSingleVariableOnRHS(stmt1)) { |
| if (Fortran::evaluate::CheckForSymbolMatch( |
| Fortran::semantics::GetExpr(stmt2Var), |
| Fortran::semantics::GetExpr(stmt2Expr))) { |
| // Atomic capture construct is of the form [capture-stmt, update-stmt] |
| const Fortran::semantics::SomeExpr &fromExpr = |
| *Fortran::semantics::GetExpr(stmt1Expr); |
| mlir::Type elementType = converter.genType(fromExpr); |
| genAtomicCaptureStatement(converter, stmt2LHSArg, stmt1LHSArg, |
| elementType, loc); |
| genAtomicUpdateStatement(converter, stmt2LHSArg, stmt2VarType, stmt2Var, |
| stmt2Expr, loc, atomicCaptureOp, &stmtCtx); |
| } else { |
| // Atomic capture construct is of the form [capture-stmt, write-stmt] |
| firOpBuilder.setInsertionPoint(atomicCaptureOp); |
| mlir::Value stmt2RHSArg = |
| fir::getBase(converter.genExprValue(assign2.rhs, stmtCtx)); |
| firOpBuilder.setInsertionPointToStart(&block); |
| const Fortran::semantics::SomeExpr &fromExpr = |
| *Fortran::semantics::GetExpr(stmt1Expr); |
| mlir::Type elementType = converter.genType(fromExpr); |
| genAtomicCaptureStatement(converter, stmt2LHSArg, stmt1LHSArg, |
| elementType, loc); |
| genAtomicWriteStatement(converter, stmt2LHSArg, stmt2RHSArg, loc); |
| } |
| } else { |
| // Atomic capture construct is of the form [update-stmt, capture-stmt] |
| const Fortran::semantics::SomeExpr &fromExpr = |
| *Fortran::semantics::GetExpr(stmt2Expr); |
| mlir::Type elementType = converter.genType(fromExpr); |
| genAtomicUpdateStatement(converter, stmt1LHSArg, stmt1VarType, stmt1Var, |
| stmt1Expr, loc, atomicCaptureOp, &stmtCtx); |
| genAtomicCaptureStatement(converter, stmt1LHSArg, stmt2LHSArg, elementType, |
| loc); |
| } |
| firOpBuilder.setInsertionPointToEnd(&block); |
| mlir::acc::TerminatorOp::create(firOpBuilder, loc); |
| // The clean-ups associated with the statements inside the capture |
| // construct must be generated after the AtomicCaptureOp. |
| firOpBuilder.setInsertionPointAfter(atomicCaptureOp); |
| } |
| |
| /// Rebuild the array type from the acc.bounds operation with constant |
| /// lowerbound/upperbound or extent. |
| static mlir::Type getTypeFromBounds(llvm::SmallVector<mlir::Value> &bounds, |
| mlir::Type ty) { |
| auto seqTy = |
| mlir::dyn_cast_or_null<fir::SequenceType>(fir::unwrapRefType(ty)); |
| if (!bounds.empty() && seqTy) { |
| llvm::SmallVector<int64_t> shape; |
| for (auto b : bounds) { |
| auto boundsOp = |
| mlir::dyn_cast<mlir::acc::DataBoundsOp>(b.getDefiningOp()); |
| if (boundsOp.getLowerbound() && |
| fir::getIntIfConstant(boundsOp.getLowerbound()) && |
| boundsOp.getUpperbound() && |
| fir::getIntIfConstant(boundsOp.getUpperbound())) { |
| int64_t ext = *fir::getIntIfConstant(boundsOp.getUpperbound()) - |
| *fir::getIntIfConstant(boundsOp.getLowerbound()) + 1; |
| shape.push_back(ext); |
| } else if (boundsOp.getExtent() && |
| fir::getIntIfConstant(boundsOp.getExtent())) { |
| shape.push_back(*fir::getIntIfConstant(boundsOp.getExtent())); |
| } else { |
| return ty; // TODO: handle dynamic shaped array slice. |
| } |
| } |
| if (shape.empty() || shape.size() != bounds.size()) |
| return ty; |
| auto newSeqTy = fir::SequenceType::get(shape, seqTy.getEleTy()); |
| if (mlir::isa<fir::ReferenceType, fir::PointerType>(ty)) |
| return fir::ReferenceType::get(newSeqTy); |
| return newSeqTy; |
| } |
| return ty; |
| } |
| |
| static mlir::SymbolRefAttr |
| createOrGetRecipe(fir::FirOpBuilder &builder, mlir::Location loc, |
| mlir::acc::RecipeKind kind, mlir::Value addr, |
| llvm::SmallVector<mlir::Value> &bounds) { |
| mlir::Type ty = getTypeFromBounds(bounds, addr.getType()); |
| // Compute the canonical recipe name for the given kind, type, address and |
| // bounds so that recipes are shared wherever possible. |
| std::string recipeName = fir::acc::getRecipeName(kind, ty, addr, bounds); |
| |
| switch (kind) { |
| case mlir::acc::RecipeKind::private_recipe: { |
| auto recipe = |
| Fortran::lower::createOrGetPrivateRecipe(builder, recipeName, loc, ty); |
| return mlir::SymbolRefAttr::get(builder.getContext(), recipe.getSymName()); |
| } |
| case mlir::acc::RecipeKind::firstprivate_recipe: { |
| auto recipe = Fortran::lower::createOrGetFirstprivateRecipe( |
| builder, recipeName, loc, ty, bounds); |
| return mlir::SymbolRefAttr::get(builder.getContext(), recipe.getSymName()); |
| } |
| default: |
| llvm::report_fatal_error( |
| "createOrGetRecipe only supports private and firstprivate recipes"); |
| } |
| } |
| |
| namespace { |
| // Helper class to keep track of designators that appear in data clauses of |
| // structured constructs so that they can be remapped to the data operation |
| // result inside the scope of the constructs. |
| class AccDataMap { |
| public: |
| struct DataComponent { |
| // Semantic representation of the component reference that appeared |
| // inside the data clause and that will need to be remapped to the |
| // data operation result. |
| Fortran::evaluate::Component component; |
| // Operation that produced the component when lowering the data clause. |
| mlir::Value designate; |
| // data operation result. |
| mlir::Value accValue; |
| }; |
| void emplaceSymbol(mlir::Value accValue, Fortran::semantics::SymbolRef sym) { |
| symbols.emplace_back(mlir::Value(accValue), |
| Fortran::semantics::SymbolRef(*sym)); |
| } |
| void emplaceComponent(mlir::Value accValue, |
| Fortran::evaluate::Component &&comp, |
| mlir::Value designate) { |
| components.emplace_back( |
| DataComponent{std::move(comp), designate, accValue}); |
| } |
| bool empty() const { return symbols.empty() && components.empty(); } |
| |
| /// Remap symbols and components that appeared in OpenACC data clauses to use |
| /// the results of the corresponding data operations. This allows isolating |
| /// symbol accesses inside the OpenACC region from accesses in the host and |
| /// other regions while preserving Fortran information about the symbols for |
| /// optimizations. |
| void remapDataOperandSymbols(Fortran::lower::AbstractConverter &converter, |
| fir::FirOpBuilder &builder, |
| mlir::Region ®ion) const; |
| |
| llvm::SmallVector<std::pair<mlir::Value, Fortran::semantics::SymbolRef>> |
| symbols; |
| llvm::SmallVector<DataComponent> components; |
| }; |
| } // namespace |
| |
| template <typename Op> |
| static void |
| genDataOperandOperations(const Fortran::parser::AccObjectList &objectList, |
| Fortran::lower::AbstractConverter &converter, |
| Fortran::semantics::SemanticsContext &semanticsContext, |
| Fortran::lower::StatementContext &stmtCtx, |
| llvm::SmallVectorImpl<mlir::Value> &dataOperands, |
| mlir::acc::DataClause dataClause, bool structured, |
| bool implicit, llvm::ArrayRef<mlir::Value> async, |
| llvm::ArrayRef<mlir::Attribute> asyncDeviceTypes, |
| llvm::ArrayRef<mlir::Attribute> asyncOnlyDeviceTypes, |
| bool setDeclareAttr = false, |
| AccDataMap *dataMap = nullptr) { |
| fir::FirOpBuilder &builder = converter.getFirOpBuilder(); |
| Fortran::evaluate::ExpressionAnalyzer ea{semanticsContext}; |
| const bool unwrapBoxAddr = true; |
| for (const auto &accObject : objectList.v) { |
| llvm::SmallVector<mlir::Value> bounds; |
| std::stringstream asFortran; |
| mlir::Location operandLocation = genOperandLocation(converter, accObject); |
| |
| Fortran::semantics::Symbol &symbol = getSymbolFromAccObject(accObject); |
| |
| std::optional<Fortran::evaluate::Component> componentRef; |
| Fortran::semantics::MaybeExpr designator = Fortran::common::visit( |
| [&](auto &&s) { return ea.Analyze(s); }, accObject.u); |
| if (std::optional<Fortran::evaluate::DataRef> dataRef = |
| Fortran::evaluate::ExtractDataRef(designator)) { |
| Fortran::common::visit( |
| Fortran::common::visitors{ |
| [&](const Fortran::evaluate::Component &component) { |
| componentRef = component; |
| }, |
| [&](const Fortran::evaluate::ArrayRef &arrayRef) { |
| if (auto *comp = arrayRef.base().UnwrapComponent()) |
| componentRef = *comp; |
| }, |
| [](const auto &) {}}, |
| dataRef->u); |
| } |
| |
| fir::factory::AddrAndBoundsInfo info = |
| Fortran::lower::gatherDataOperandAddrAndBounds< |
| mlir::acc::DataBoundsOp, mlir::acc::DataBoundsType>( |
| converter, builder, semanticsContext, stmtCtx, symbol, designator, |
| operandLocation, asFortran, bounds, |
| /*treatIndexAsSection=*/true, /*unwrapFirBox=*/false, |
| /*genDefaultBounds=*/generateDefaultBounds, |
| /*strideIncludeLowerExtent=*/strideIncludeLowerExtent, |
| /*loadAllocatableAndPointerComponent=*/false); |
| LLVM_DEBUG(llvm::dbgs() << __func__ << "\n"; info.dump(llvm::dbgs())); |
| |
| // If the input value is optional and is not a descriptor, we use the |
| // rawInput directly. |
| // For privatization, absent OPTIONAL are illegal as per OpenACC 3.3 |
| // section 2.17.1 and the descriptor must be used to drive the creation of |
| // the temporary and the copy. |
| bool isPrivate = std::is_same_v<Op, mlir::acc::PrivateOp> || |
| std::is_same_v<Op, mlir::acc::FirstprivateOp>; |
| mlir::Value baseAddr = (!isPrivate && |
| (fir::unwrapRefType(info.addr.getType()) != |
| fir::unwrapRefType(info.rawInput.getType())) && |
| info.isPresent) |
| ? info.rawInput |
| : info.addr; |
| |
| // TODO: update privatization of array section to return the base |
| // address and update the recipe generation to "offset back" the returned |
| // address. Then it will be possible to remap them like in other cases. |
| bool isPrivateArraySection = isPrivate && !bounds.empty(); |
| mlir::Type resTy = isPrivateArraySection |
| ? getTypeFromBounds(bounds, baseAddr.getType()) |
| : baseAddr.getType(); |
| |
| Op op = createDataEntryOp<Op>( |
| builder, operandLocation, baseAddr, asFortran, bounds, structured, |
| implicit, dataClause, resTy, async, asyncDeviceTypes, |
| asyncOnlyDeviceTypes, unwrapBoxAddr, info.isPresent); |
| dataOperands.push_back(op.getAccVar()); |
| |
| // Optionally tag the underlying variable with a declare attribute. |
| if (setDeclareAttr) |
| if (auto *defOp = op.getVar().getDefiningOp()) |
| addDeclareAttr(builder, defOp, dataClause); |
| |
| // TODO: no_create remapping could currently cause segfaults because of the |
| // fir.box_addr that may be inserted in the remapping in the region. |
| // This is an issue if the variable is not mapped (which is OK if its |
| // accesses are not reached inside the construct). |
| bool isNoCreateWithBounds = |
| std::is_same_v<Op, mlir::acc::NoCreateOp> && !bounds.empty(); |
| |
| // Track the symbol and its corresponding mlir::Value if requested so that |
| // accesses inside regions can be remapped. |
| if (dataMap && !isPrivateArraySection && !isNoCreateWithBounds) { |
| if (componentRef) |
| dataMap->emplaceComponent(op.getAccVar(), std::move(*componentRef), |
| baseAddr); |
| else |
| dataMap->emplaceSymbol(op.getAccVar(), |
| Fortran::semantics::SymbolRef(symbol)); |
| } |
| |
| // For private/firstprivate, attach (and optionally record) the recipe. |
| if constexpr (std::is_same_v<Op, mlir::acc::PrivateOp>) { |
| mlir::SymbolRefAttr recipeAttr = createOrGetRecipe( |
| builder, operandLocation, mlir::acc::RecipeKind::private_recipe, |
| info.addr, bounds); |
| op.setRecipeAttr(recipeAttr); |
| } else if constexpr (std::is_same_v<Op, mlir::acc::FirstprivateOp>) { |
| mlir::SymbolRefAttr recipeAttr = createOrGetRecipe( |
| builder, operandLocation, mlir::acc::RecipeKind::firstprivate_recipe, |
| info.addr, bounds); |
| op.setRecipeAttr(recipeAttr); |
| } |
| } |
| } |
| |
| template <typename GlobalCtorOrDtorOp, typename EntryOp, typename DeclareOp, |
| typename ExitOp> |
| static void createDeclareGlobalOp(mlir::OpBuilder &modBuilder, |
| fir::FirOpBuilder &builder, |
| mlir::Location loc, fir::GlobalOp globalOp, |
| mlir::acc::DataClause clause, |
| const std::string &declareGlobalName, |
| bool implicit, std::stringstream &asFortran) { |
| GlobalCtorOrDtorOp declareGlobalOp = |
| GlobalCtorOrDtorOp::create(modBuilder, loc, declareGlobalName); |
| builder.createBlock(&declareGlobalOp.getRegion(), |
| declareGlobalOp.getRegion().end(), {}, {}); |
| builder.setInsertionPointToEnd(&declareGlobalOp.getRegion().back()); |
| |
| fir::AddrOfOp addrOp = fir::AddrOfOp::create( |
| builder, loc, fir::ReferenceType::get(globalOp.getType()), |
| globalOp.getSymbol()); |
| addDeclareAttr(builder, addrOp, clause); |
| |
| llvm::SmallVector<mlir::Value> bounds; |
| EntryOp entryOp = createDataEntryOp<EntryOp>( |
| builder, loc, addrOp.getResTy(), asFortran, bounds, |
| /*structured=*/false, implicit, clause, addrOp.getResTy().getType(), |
| /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{}); |
| if constexpr (std::is_same_v<DeclareOp, mlir::acc::DeclareEnterOp>) |
| DeclareOp::create(builder, loc, |
| mlir::acc::DeclareTokenType::get(entryOp.getContext()), |
| mlir::ValueRange(entryOp.getAccVar())); |
| else |
| DeclareOp::create(builder, loc, mlir::Value{}, |
| mlir::ValueRange(entryOp.getAccVar())); |
| if constexpr (std::is_same_v<GlobalCtorOrDtorOp, |
| mlir::acc::GlobalDestructorOp>) { |
| if constexpr (std::is_same_v<ExitOp, mlir::acc::DeclareLinkOp>) { |
| // No destructor emission for declare link in this path to avoid |
| // complex var/varType/varPtrPtr signatures. The ctor registers the link. |
| } else if constexpr (std::is_same_v<ExitOp, mlir::acc::CopyoutOp> || |
| std::is_same_v<ExitOp, mlir::acc::UpdateHostOp>) { |
| ExitOp::create(builder, entryOp.getLoc(), entryOp.getAccVar(), |
| entryOp.getVar(), entryOp.getVarType(), |
| entryOp.getBounds(), entryOp.getAsyncOperands(), |
| entryOp.getAsyncOperandsDeviceTypeAttr(), |
| entryOp.getAsyncOnlyAttr(), entryOp.getDataClause(), |
| /*structured=*/false, /*implicit=*/false, |
| builder.getStringAttr(*entryOp.getName())); |
| } else { |
| ExitOp::create(builder, entryOp.getLoc(), entryOp.getAccVar(), |
| entryOp.getBounds(), entryOp.getAsyncOperands(), |
| entryOp.getAsyncOperandsDeviceTypeAttr(), |
| entryOp.getAsyncOnlyAttr(), entryOp.getDataClause(), |
| /*structured=*/false, /*implicit=*/false, |
| builder.getStringAttr(*entryOp.getName())); |
| } |
| } |
| mlir::acc::TerminatorOp::create(builder, loc); |
| modBuilder.setInsertionPointAfter(declareGlobalOp); |
| } |
| |
| template <typename EntryOp, typename ExitOp> |
| static void |
| emitCtorDtorPair(mlir::OpBuilder &modBuilder, fir::FirOpBuilder &builder, |
| mlir::Location operandLocation, fir::GlobalOp globalOp, |
| mlir::acc::DataClause clause, std::stringstream &asFortran, |
| const std::string &ctorName) { |
| createDeclareGlobalOp<mlir::acc::GlobalConstructorOp, EntryOp, |
| mlir::acc::DeclareEnterOp, ExitOp>( |
| modBuilder, builder, operandLocation, globalOp, clause, ctorName, |
| /*implicit=*/false, asFortran); |
| |
| std::stringstream dtorName; |
| dtorName << globalOp.getSymName().str() << "_acc_dtor"; |
| createDeclareGlobalOp<mlir::acc::GlobalDestructorOp, |
| mlir::acc::GetDevicePtrOp, mlir::acc::DeclareExitOp, |
| ExitOp>(modBuilder, builder, operandLocation, globalOp, |
| clause, dtorName.str(), |
| /*implicit=*/false, asFortran); |
| } |
| |
| template <typename EntryOp, typename ExitOp> |
| static void genDeclareDataOperandOperations( |
| const Fortran::parser::AccObjectList &objectList, |
| Fortran::lower::AbstractConverter &converter, |
| Fortran::semantics::SemanticsContext &semanticsContext, |
| Fortran::lower::StatementContext &stmtCtx, |
| llvm::SmallVectorImpl<mlir::Value> &dataOperands, |
| mlir::acc::DataClause dataClause, bool structured, bool implicit) { |
| fir::FirOpBuilder &builder = converter.getFirOpBuilder(); |
| Fortran::evaluate::ExpressionAnalyzer ea{semanticsContext}; |
| for (const auto &accObject : objectList.v) { |
| llvm::SmallVector<mlir::Value> bounds; |
| std::stringstream asFortran; |
| mlir::Location operandLocation = genOperandLocation(converter, accObject); |
| Fortran::semantics::Symbol &symbol = getSymbolFromAccObject(accObject); |
| // Handle COMMON/global symbols via module-level ctor/dtor path. |
| if (symbol.detailsIf<Fortran::semantics::CommonBlockDetails>() || |
| Fortran::semantics::FindCommonBlockContaining(symbol)) { |
| emitCommonGlobal( |
| converter, builder, accObject, dataClause, |
| [&](mlir::OpBuilder &modBuilder, [[maybe_unused]] mlir::Location loc, |
| [[maybe_unused]] fir::GlobalOp globalOp, |
| [[maybe_unused]] mlir::acc::DataClause clause, |
| std::stringstream &asFortranStr, const std::string &ctorName) { |
| if constexpr (std::is_same_v<EntryOp, mlir::acc::DeclareLinkOp>) { |
| createDeclareGlobalOp< |
| mlir::acc::GlobalConstructorOp, mlir::acc::DeclareLinkOp, |
| mlir::acc::DeclareEnterOp, mlir::acc::DeclareLinkOp>( |
| modBuilder, builder, loc, globalOp, clause, ctorName, |
| /*implicit=*/false, asFortranStr); |
| } else if constexpr (std::is_same_v<EntryOp, mlir::acc::CreateOp> || |
| std::is_same_v<EntryOp, mlir::acc::CopyinOp> || |
| std::is_same_v< |
| EntryOp, |
| mlir::acc::DeclareDeviceResidentOp> || |
| std::is_same_v<ExitOp, mlir::acc::CopyoutOp>) { |
| emitCtorDtorPair<EntryOp, ExitOp>(modBuilder, builder, loc, |
| globalOp, clause, asFortranStr, |
| ctorName); |
| } else { |
| // No module-level ctor/dtor for this clause (e.g., deviceptr, |
| // present). Handled via structured declare region only. |
| return; |
| } |
| }); |
| continue; |
| } |
| Fortran::semantics::MaybeExpr designator = Fortran::common::visit( |
| [&](auto &&s) { return ea.Analyze(s); }, accObject.u); |
| |
| if (designator) { |
| Fortran::semantics::SomeExpr someExpr = *designator; |
| if (Fortran::lower::detail::getRef<Fortran::evaluate::Component>( |
| someExpr)) { |
| TODO(operandLocation, |
| "OpenACC declare with component reference not yet supported"); |
| } |
| } |
| fir::factory::AddrAndBoundsInfo info = |
| Fortran::lower::gatherDataOperandAddrAndBounds< |
| mlir::acc::DataBoundsOp, mlir::acc::DataBoundsType>( |
| converter, builder, semanticsContext, stmtCtx, symbol, designator, |
| operandLocation, asFortran, bounds, |
| /*treatIndexAsSection=*/true, /*unwrapFirBox=*/false, |
| /*genDefaultBounds=*/generateDefaultBounds, |
| /*strideIncludeLowerExtent=*/strideIncludeLowerExtent, |
| /*loadAllocatableAndPointerComponent=*/false); |
| LLVM_DEBUG(llvm::dbgs() << __func__ << "\n"; info.dump(llvm::dbgs())); |
| EntryOp op = createDataEntryOp<EntryOp>( |
| builder, operandLocation, info.addr, asFortran, bounds, structured, |
| implicit, dataClause, info.addr.getType(), |
| /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{}); |
| dataOperands.push_back(op.getAccVar()); |
| addDeclareAttr(builder, op.getVar().getDefiningOp(), dataClause); |
| if (mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(info.addr.getType()))) { |
| mlir::OpBuilder modBuilder(builder.getModule().getBodyRegion()); |
| modBuilder.setInsertionPointAfter(builder.getFunction()); |
| std::string prefix = converter.mangleName(symbol); |
| createDeclareAllocFuncWithArg<EntryOp>( |
| modBuilder, builder, operandLocation, info.addr.getType(), prefix, |
| asFortran, dataClause); |
| if constexpr (!std::is_same_v<EntryOp, ExitOp>) |
| createDeclareDeallocFuncWithArg<ExitOp>( |
| modBuilder, builder, operandLocation, info.addr.getType(), prefix, |
| asFortran, dataClause); |
| } |
| } |
| } |
| |
| template <typename EntryOp, typename ExitOp, typename Clause> |
| static void genDeclareDataOperandOperationsWithModifier( |
| const Clause *x, Fortran::lower::AbstractConverter &converter, |
| Fortran::semantics::SemanticsContext &semanticsContext, |
| Fortran::lower::StatementContext &stmtCtx, |
| Fortran::parser::AccDataModifier::Modifier mod, |
| llvm::SmallVectorImpl<mlir::Value> &dataClauseOperands, |
| const mlir::acc::DataClause clause, |
| const mlir::acc::DataClause clauseWithModifier) { |
| const Fortran::parser::AccObjectListWithModifier &listWithModifier = x->v; |
| const auto &accObjectList = |
| std::get<Fortran::parser::AccObjectList>(listWithModifier.t); |
| const auto &modifier = |
| std::get<std::optional<Fortran::parser::AccDataModifier>>( |
| listWithModifier.t); |
| mlir::acc::DataClause dataClause = |
| (modifier && (*modifier).v == mod) ? clauseWithModifier : clause; |
| genDeclareDataOperandOperations<EntryOp, ExitOp>( |
| accObjectList, converter, semanticsContext, stmtCtx, dataClauseOperands, |
| dataClause, |
| /*structured=*/true, /*implicit=*/false); |
| } |
| |
| template <typename EntryOp, typename ExitOp> |
| static void |
| genDataExitOperations(fir::FirOpBuilder &builder, |
| llvm::SmallVector<mlir::Value> operands, bool structured, |
| std::optional<mlir::Location> exitLoc = std::nullopt) { |
| for (mlir::Value operand : operands) { |
| auto entryOp = mlir::dyn_cast_or_null<EntryOp>(operand.getDefiningOp()); |
| assert(entryOp && "data entry op expected"); |
| mlir::Location opLoc = exitLoc ? *exitLoc : entryOp.getLoc(); |
| if constexpr (std::is_same_v<ExitOp, mlir::acc::CopyoutOp> || |
| std::is_same_v<ExitOp, mlir::acc::UpdateHostOp>) |
| ExitOp::create( |
| builder, opLoc, entryOp.getAccVar(), entryOp.getVar(), |
| entryOp.getVarType(), entryOp.getBounds(), entryOp.getAsyncOperands(), |
| entryOp.getAsyncOperandsDeviceTypeAttr(), entryOp.getAsyncOnlyAttr(), |
| entryOp.getDataClause(), structured, entryOp.getImplicit(), |
| builder.getStringAttr(*entryOp.getName())); |
| else |
| ExitOp::create( |
| builder, opLoc, entryOp.getAccVar(), entryOp.getBounds(), |
| entryOp.getAsyncOperands(), entryOp.getAsyncOperandsDeviceTypeAttr(), |
| entryOp.getAsyncOnlyAttr(), entryOp.getDataClause(), structured, |
| entryOp.getImplicit(), builder.getStringAttr(*entryOp.getName())); |
| } |
| } |
| |
| fir::ShapeOp genShapeOp(mlir::OpBuilder &builder, fir::SequenceType seqTy, |
| mlir::Location loc) { |
| llvm::SmallVector<mlir::Value> extents; |
| mlir::Type idxTy = builder.getIndexType(); |
| for (auto extent : seqTy.getShape()) |
| extents.push_back(mlir::arith::ConstantOp::create( |
| builder, loc, idxTy, builder.getIntegerAttr(idxTy, extent))); |
| return fir::ShapeOp::create(builder, loc, extents); |
| } |
| |
| /// Get the initial value for reduction operator. |
| template <typename R> |
| static R getReductionInitValue(mlir::acc::ReductionOperator op, mlir::Type ty) { |
| if (op == mlir::acc::ReductionOperator::AccMin) { |
| // min init value -> largest |
| if constexpr (std::is_same_v<R, llvm::APInt>) { |
| assert(ty.isIntOrIndex() && "expect integer or index type"); |
| return llvm::APInt::getSignedMaxValue(ty.getIntOrFloatBitWidth()); |
| } |
| if constexpr (std::is_same_v<R, llvm::APFloat>) { |
| auto floatTy = mlir::dyn_cast_or_null<mlir::FloatType>(ty); |
| assert(floatTy && "expect float type"); |
| return llvm::APFloat::getLargest(floatTy.getFloatSemantics(), |
| /*negative=*/false); |
| } |
| } else if (op == mlir::acc::ReductionOperator::AccMax) { |
| // max init value -> smallest |
| if constexpr (std::is_same_v<R, llvm::APInt>) { |
| assert(ty.isIntOrIndex() && "expect integer or index type"); |
| return llvm::APInt::getSignedMinValue(ty.getIntOrFloatBitWidth()); |
| } |
| if constexpr (std::is_same_v<R, llvm::APFloat>) { |
| auto floatTy = mlir::dyn_cast_or_null<mlir::FloatType>(ty); |
| assert(floatTy && "expect float type"); |
| return llvm::APFloat::getSmallest(floatTy.getFloatSemantics(), |
| /*negative=*/true); |
| } |
| } else if (op == mlir::acc::ReductionOperator::AccIand) { |
| if constexpr (std::is_same_v<R, llvm::APInt>) { |
| assert(ty.isIntOrIndex() && "expect integer type"); |
| unsigned bits = ty.getIntOrFloatBitWidth(); |
| return llvm::APInt::getAllOnes(bits); |
| } |
| } else { |
| assert(op != mlir::acc::ReductionOperator::AccNone); |
| // +, ior, ieor init value -> 0 |
| // * init value -> 1 |
| int64_t value = (op == mlir::acc::ReductionOperator::AccMul) ? 1 : 0; |
| if constexpr (std::is_same_v<R, llvm::APInt>) { |
| assert(ty.isIntOrIndex() && "expect integer or index type"); |
| return llvm::APInt(ty.getIntOrFloatBitWidth(), value, true); |
| } |
| |
| if constexpr (std::is_same_v<R, llvm::APFloat>) { |
| assert(mlir::isa<mlir::FloatType>(ty) && "expect float type"); |
| auto floatTy = mlir::dyn_cast<mlir::FloatType>(ty); |
| return llvm::APFloat(floatTy.getFloatSemantics(), value); |
| } |
| |
| if constexpr (std::is_same_v<R, int64_t>) |
| return value; |
| } |
| llvm_unreachable("OpenACC reduction unsupported type"); |
| } |
| |
| /// Return a constant with the initial value for the reduction operator and |
| /// type combination. |
| static mlir::Value getReductionInitValue(fir::FirOpBuilder &builder, |
| mlir::Location loc, mlir::Type ty, |
| mlir::acc::ReductionOperator op) { |
| if (op == mlir::acc::ReductionOperator::AccLand || |
| op == mlir::acc::ReductionOperator::AccLor || |
| op == mlir::acc::ReductionOperator::AccEqv || |
| op == mlir::acc::ReductionOperator::AccNeqv) { |
| assert(mlir::isa<fir::LogicalType>(ty) && "expect fir.logical type"); |
| bool value = true; // .true. for .and. and .eqv. |
| if (op == mlir::acc::ReductionOperator::AccLor || |
| op == mlir::acc::ReductionOperator::AccNeqv) |
| value = false; // .false. for .or. and .neqv. |
| return builder.createBool(loc, value); |
| } |
| if (ty.isIntOrIndex()) |
| return mlir::arith::ConstantOp::create( |
| builder, loc, ty, |
| builder.getIntegerAttr(ty, getReductionInitValue<llvm::APInt>(op, ty))); |
| if (op == mlir::acc::ReductionOperator::AccMin || |
| op == mlir::acc::ReductionOperator::AccMax) { |
| if (mlir::isa<mlir::ComplexType>(ty)) |
| llvm::report_fatal_error( |
| "min/max reduction not supported for complex type"); |
| if (auto floatTy = mlir::dyn_cast_or_null<mlir::FloatType>(ty)) |
| return mlir::arith::ConstantOp::create( |
| builder, loc, ty, |
| builder.getFloatAttr(ty, |
| getReductionInitValue<llvm::APFloat>(op, ty))); |
| } else if (auto floatTy = mlir::dyn_cast_or_null<mlir::FloatType>(ty)) { |
| return mlir::arith::ConstantOp::create( |
| builder, loc, ty, |
| builder.getFloatAttr(ty, getReductionInitValue<int64_t>(op, ty))); |
| } else if (auto cmplxTy = mlir::dyn_cast_or_null<mlir::ComplexType>(ty)) { |
| mlir::Type floatTy = cmplxTy.getElementType(); |
| mlir::Value realInit = builder.createRealConstant( |
| loc, floatTy, getReductionInitValue<int64_t>(op, cmplxTy)); |
| mlir::Value imagInit = builder.createRealConstant(loc, floatTy, 0.0); |
| return fir::factory::Complex{builder, loc}.createComplex(cmplxTy, realInit, |
| imagInit); |
| } |
| |
| if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(ty)) |
| return getReductionInitValue(builder, loc, seqTy.getEleTy(), op); |
| |
| if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(ty)) |
| return getReductionInitValue(builder, loc, boxTy.getEleTy(), op); |
| |
| if (auto heapTy = mlir::dyn_cast<fir::HeapType>(ty)) |
| return getReductionInitValue(builder, loc, heapTy.getEleTy(), op); |
| |
| if (auto ptrTy = mlir::dyn_cast<fir::PointerType>(ty)) |
| return getReductionInitValue(builder, loc, ptrTy.getEleTy(), op); |
| |
| llvm::report_fatal_error("Unsupported OpenACC reduction type"); |
| } |
| |
| template <typename RecipeOp> |
| static RecipeOp genRecipeOp( |
| fir::FirOpBuilder &builder, mlir::ModuleOp mod, llvm::StringRef recipeName, |
| mlir::Location loc, mlir::Type ty, |
| mlir::acc::ReductionOperator op = mlir::acc::ReductionOperator::AccNone) { |
| mlir::OpBuilder modBuilder(mod.getBodyRegion()); |
| RecipeOp recipe; |
| if constexpr (std::is_same_v<RecipeOp, mlir::acc::ReductionRecipeOp>) { |
| recipe = mlir::acc::ReductionRecipeOp::create(modBuilder, loc, recipeName, |
| ty, op); |
| } else { |
| recipe = RecipeOp::create(modBuilder, loc, recipeName, ty); |
| } |
| |
| llvm::SmallVector<mlir::Type> argsTy{ty}; |
| llvm::SmallVector<mlir::Location> argsLoc{loc}; |
| if (auto refTy = mlir::dyn_cast_or_null<fir::ReferenceType>(ty)) { |
| if (auto seqTy = |
| mlir::dyn_cast_or_null<fir::SequenceType>(refTy.getEleTy())) { |
| if (seqTy.hasDynamicExtents()) { |
| mlir::Type idxTy = builder.getIndexType(); |
| for (unsigned i = 0; i < seqTy.getDimension(); ++i) { |
| argsTy.push_back(idxTy); |
| argsLoc.push_back(loc); |
| } |
| } |
| } |
| } |
| auto initBlock = builder.createBlock( |
| &recipe.getInitRegion(), recipe.getInitRegion().end(), argsTy, argsLoc); |
| builder.setInsertionPointToEnd(&recipe.getInitRegion().back()); |
| mlir::Value initValue; |
| if constexpr (std::is_same_v<RecipeOp, mlir::acc::ReductionRecipeOp>) { |
| assert(op != mlir::acc::ReductionOperator::AccNone); |
| initValue = getReductionInitValue(builder, loc, fir::unwrapRefType(ty), op); |
| } |
| |
| // Since we reuse the same recipe for all variables of the same type - we |
| // cannot use the actual variable name. Thus use a temporary name. |
| llvm::StringRef initName; |
| if constexpr (std::is_same_v<RecipeOp, mlir::acc::ReductionRecipeOp>) |
| initName = accReductionInitName; |
| else |
| initName = accPrivateInitName; |
| |
| auto mappableTy = mlir::dyn_cast<mlir::acc::MappableType>(ty); |
| assert(mappableTy && |
| "Expected that all variable types are considered mappable"); |
| bool needsDestroy = false; |
| auto retVal = mappableTy.generatePrivateInit( |
| builder, loc, |
| mlir::cast<mlir::TypedValue<mlir::acc::MappableType>>( |
| initBlock->getArgument(0)), |
| initName, |
| initBlock->getArguments().take_back(initBlock->getArguments().size() - 1), |
| initValue, needsDestroy); |
| mlir::acc::YieldOp::create(builder, loc, |
| retVal ? retVal : initBlock->getArgument(0)); |
| // Create destroy region and generate destruction if requested. |
| if (needsDestroy) { |
| llvm::SmallVector<mlir::Type> destroyArgsTy; |
| llvm::SmallVector<mlir::Location> destroyArgsLoc; |
| // original and privatized/reduction value |
| destroyArgsTy.push_back(ty); |
| destroyArgsTy.push_back(ty); |
| destroyArgsLoc.push_back(loc); |
| destroyArgsLoc.push_back(loc); |
| // Append bounds arguments (if any) in the same order as init region |
| if (argsTy.size() > 1) { |
| destroyArgsTy.append(argsTy.begin() + 1, argsTy.end()); |
| destroyArgsLoc.insert(destroyArgsLoc.end(), argsTy.size() - 1, loc); |
| } |
| |
| builder.createBlock(&recipe.getDestroyRegion(), |
| recipe.getDestroyRegion().end(), destroyArgsTy, |
| destroyArgsLoc); |
| builder.setInsertionPointToEnd(&recipe.getDestroyRegion().back()); |
| // Call interface on the privatized/reduction value (2nd argument). |
| (void)mappableTy.generatePrivateDestroy( |
| builder, loc, recipe.getDestroyRegion().front().getArgument(1)); |
| mlir::acc::TerminatorOp::create(builder, loc); |
| } |
| return recipe; |
| } |
| |
| mlir::acc::PrivateRecipeOp |
| Fortran::lower::createOrGetPrivateRecipe(fir::FirOpBuilder &builder, |
| llvm::StringRef recipeName, |
| mlir::Location loc, mlir::Type ty) { |
| mlir::ModuleOp mod = |
| builder.getBlock()->getParent()->getParentOfType<mlir::ModuleOp>(); |
| if (auto recipe = mod.lookupSymbol<mlir::acc::PrivateRecipeOp>(recipeName)) |
| return recipe; |
| |
| auto ip = builder.saveInsertionPoint(); |
| auto recipe = genRecipeOp<mlir::acc::PrivateRecipeOp>(builder, mod, |
| recipeName, loc, ty); |
| builder.restoreInsertionPoint(ip); |
| return recipe; |
| } |
| |
| /// Check if the DataBoundsOp is a constant bound (lb and ub are constants or |
| /// extent is a constant). |
| bool isConstantBound(mlir::acc::DataBoundsOp &op) { |
| if (op.getLowerbound() && fir::getIntIfConstant(op.getLowerbound()) && |
| op.getUpperbound() && fir::getIntIfConstant(op.getUpperbound())) |
| return true; |
| if (op.getExtent() && fir::getIntIfConstant(op.getExtent())) |
| return true; |
| return false; |
| } |
| |
| static llvm::SmallVector<mlir::Value> |
| genConstantBounds(fir::FirOpBuilder &builder, mlir::Location loc, |
| mlir::acc::DataBoundsOp &dataBound) { |
| mlir::Type idxTy = builder.getIndexType(); |
| mlir::Value lb, ub, step; |
| if (dataBound.getLowerbound() && |
| fir::getIntIfConstant(dataBound.getLowerbound()) && |
| dataBound.getUpperbound() && |
| fir::getIntIfConstant(dataBound.getUpperbound())) { |
| lb = builder.createIntegerConstant( |
| loc, idxTy, *fir::getIntIfConstant(dataBound.getLowerbound())); |
| ub = builder.createIntegerConstant( |
| loc, idxTy, *fir::getIntIfConstant(dataBound.getUpperbound())); |
| step = builder.createIntegerConstant(loc, idxTy, 1); |
| } else if (dataBound.getExtent()) { |
| lb = builder.createIntegerConstant(loc, idxTy, 0); |
| ub = builder.createIntegerConstant( |
| loc, idxTy, *fir::getIntIfConstant(dataBound.getExtent()) - 1); |
| step = builder.createIntegerConstant(loc, idxTy, 1); |
| } else { |
| llvm::report_fatal_error("Expect constant lb/ub or extent"); |
| } |
| return {lb, ub, step}; |
| } |
| |
| static hlfir::Entity genDesignateWithTriplets( |
| fir::FirOpBuilder &builder, mlir::Location loc, hlfir::Entity &entity, |
| hlfir::DesignateOp::Subscripts &triplets, mlir::Value shape) { |
| llvm::SmallVector<mlir::Value> lenParams; |
| hlfir::genLengthParameters(loc, builder, entity, lenParams); |
| auto designate = hlfir::DesignateOp::create( |
| builder, loc, entity.getBase().getType(), entity, /*component=*/"", |
| /*componentShape=*/mlir::Value{}, triplets, |
| /*substring=*/mlir::ValueRange{}, /*complexPartAttr=*/std::nullopt, shape, |
| lenParams); |
| return hlfir::Entity{designate.getResult()}; |
| } |
| |
| // Designate uses triplets based on object lower bounds while acc.bounds are |
| // zero based. This helper shift the bounds to create the designate triplets. |
| static hlfir::DesignateOp::Subscripts |
| genTripletsFromAccBounds(fir::FirOpBuilder &builder, mlir::Location loc, |
| const llvm::SmallVector<mlir::Value> &accBounds, |
| hlfir::Entity entity) { |
| assert(entity.getRank() * 3 == static_cast<int>(accBounds.size()) && |
| "must get lb,ub,step for each dimension"); |
| hlfir::DesignateOp::Subscripts triplets; |
| for (unsigned i = 0; i < accBounds.size(); i += 3) { |
| mlir::Value lb = hlfir::genLBound(loc, builder, entity, i / 3); |
| lb = builder.createConvert(loc, accBounds[i].getType(), lb); |
| assert(accBounds[i].getType() == accBounds[i + 1].getType() && |
| "mix of integer types in triplets"); |
| mlir::Value sliceLB = |
| builder.createOrFold<mlir::arith::AddIOp>(loc, accBounds[i], lb); |
| mlir::Value sliceUB = |
| builder.createOrFold<mlir::arith::AddIOp>(loc, accBounds[i + 1], lb); |
| triplets.emplace_back( |
| hlfir::DesignateOp::Triplet{sliceLB, sliceUB, accBounds[i + 2]}); |
| } |
| return triplets; |
| } |
| |
| static std::pair<hlfir::Entity, hlfir::Entity> |
| genArraySectionsInRecipe(fir::FirOpBuilder &builder, mlir::Location loc, |
| llvm::SmallVector<mlir::Value> &dataOperationBounds, |
| mlir::ValueRange recipeArguments, |
| bool allConstantBound, hlfir::Entity lhs, |
| hlfir::Entity rhs) { |
| lhs = hlfir::derefPointersAndAllocatables(loc, builder, lhs); |
| rhs = hlfir::derefPointersAndAllocatables(loc, builder, rhs); |
| // Get the list of lb,ub,step values for the sections that can be used inside |
| // the recipe region. |
| llvm::SmallVector<mlir::Value> bounds; |
| if (allConstantBound) { |
| // For constant bounds, the bounds are not region arguments. Materialize |
| // constants looking at the IR for the bounds on the data operation. |
| for (auto bound : dataOperationBounds) { |
| auto dataBound = |
| mlir::cast<mlir::acc::DataBoundsOp>(bound.getDefiningOp()); |
| bounds.append(genConstantBounds(builder, loc, dataBound)); |
| } |
| } else { |
| // If one bound is not constant, all of the bounds are region arguments. |
| for (auto arg : recipeArguments.drop_front(2)) |
| bounds.push_back(arg); |
| } |
| // Compute the fir.shape of the array section and the triplets to create |
| // hlfir.designate. |
| assert(lhs.getRank() * 3 == static_cast<int>(bounds.size()) && |
| "must get lb,ub,step for each dimension"); |
| llvm::SmallVector<mlir::Value> extents; |
| mlir::Type idxTy = builder.getIndexType(); |
| for (unsigned i = 0; i < bounds.size(); i += 3) |
| extents.push_back(builder.genExtentFromTriplet( |
| loc, bounds[i], bounds[i + 1], bounds[i + 2], idxTy)); |
| mlir::Value shape = fir::ShapeOp::create(builder, loc, extents); |
| hlfir::DesignateOp::Subscripts rhsTriplets = |
| genTripletsFromAccBounds(builder, loc, bounds, rhs); |
| hlfir::DesignateOp::Subscripts lhsTriplets; |
| // Share the bounds when both rhs/lhs are known to be 1-based to avoid noise |
| // in the IR for the most common cases. |
| if (!lhs.mayHaveNonDefaultLowerBounds() && |
| !rhs.mayHaveNonDefaultLowerBounds()) |
| lhsTriplets = rhsTriplets; |
| else |
| lhsTriplets = genTripletsFromAccBounds(builder, loc, bounds, lhs); |
| hlfir::Entity leftSection = |
| genDesignateWithTriplets(builder, loc, lhs, lhsTriplets, shape); |
| hlfir::Entity rightSection = |
| genDesignateWithTriplets(builder, loc, rhs, rhsTriplets, shape); |
| return {leftSection, rightSection}; |
| } |
| |
| // Generate the combiner or copy region block and block arguments and return the |
| // source and destination entities. |
| static std::pair<hlfir::Entity, hlfir::Entity> |
| genRecipeCombinerOrCopyRegion(fir::FirOpBuilder &builder, mlir::Location loc, |
| mlir::Type ty, mlir::Region ®ion, |
| llvm::SmallVector<mlir::Value> &bounds, |
| bool allConstantBound) { |
| llvm::SmallVector<mlir::Type> argsTy{ty, ty}; |
| llvm::SmallVector<mlir::Location> argsLoc{loc, loc}; |
| if (!allConstantBound) { |
| for (mlir::Value bound : llvm::reverse(bounds)) { |
| auto dataBound = |
| mlir::dyn_cast<mlir::acc::DataBoundsOp>(bound.getDefiningOp()); |
| argsTy.push_back(dataBound.getLowerbound().getType()); |
| argsLoc.push_back(dataBound.getLowerbound().getLoc()); |
| argsTy.push_back(dataBound.getUpperbound().getType()); |
| argsLoc.push_back(dataBound.getUpperbound().getLoc()); |
| argsTy.push_back(dataBound.getStartIdx().getType()); |
| argsLoc.push_back(dataBound.getStartIdx().getLoc()); |
| } |
| } |
| mlir::Block *block = |
| builder.createBlock(®ion, region.end(), argsTy, argsLoc); |
| builder.setInsertionPointToEnd(®ion.back()); |
| return {hlfir::Entity{block->getArgument(0)}, |
| hlfir::Entity{block->getArgument(1)}}; |
| } |
| |
| mlir::acc::FirstprivateRecipeOp Fortran::lower::createOrGetFirstprivateRecipe( |
| fir::FirOpBuilder &builder, llvm::StringRef recipeName, mlir::Location loc, |
| mlir::Type ty, llvm::SmallVector<mlir::Value> &bounds) { |
| mlir::ModuleOp mod = |
| builder.getBlock()->getParent()->getParentOfType<mlir::ModuleOp>(); |
| if (auto recipe = |
| mod.lookupSymbol<mlir::acc::FirstprivateRecipeOp>(recipeName)) |
| return recipe; |
| |
| mlir::OpBuilder::InsertionGuard guard(builder); |
| auto recipe = genRecipeOp<mlir::acc::FirstprivateRecipeOp>( |
| builder, mod, recipeName, loc, ty); |
| bool allConstantBound = fir::acc::areAllBoundsConstant(bounds); |
| auto [source, destination] = genRecipeCombinerOrCopyRegion( |
| builder, loc, ty, recipe.getCopyRegion(), bounds, allConstantBound); |
| |
| fir::FirOpBuilder firBuilder{builder, recipe.getOperation()}; |
| |
| source = hlfir::derefPointersAndAllocatables(loc, builder, source); |
| destination = hlfir::derefPointersAndAllocatables(loc, builder, destination); |
| |
| if (!bounds.empty()) |
| std::tie(source, destination) = genArraySectionsInRecipe( |
| firBuilder, loc, bounds, recipe.getCopyRegion().getArguments(), |
| allConstantBound, source, destination); |
| // The source and the destination of the firstprivate copy cannot alias, |
| // the destination is already properly allocated, so a simple assignment |
| // can be generated right away to avoid ending-up with runtime calls |
| // for arrays of numerical, logical and, character types. |
| // |
| // The temporary_lhs flag allows indicating that user defined assignments |
| // should not be called while copying components, and that the LHS and RHS |
| // are known to not alias since the LHS is a created object. |
| // |
| // TODO: detect cases where user defined assignment is needed and add a TODO. |
| // using temporary_lhs allows more aggressive optimizations of simple derived |
| // types. Existing compilers supporting OpenACC do not call user defined |
| // assignments, some use case is needed to decide what to do. |
| source = hlfir::loadTrivialScalar(loc, builder, source); |
| hlfir::AssignOp::create(builder, loc, source, destination, /*realloc=*/false, |
| /*keep_lhs_length_if_realloc=*/false, |
| /*temporary_lhs=*/true); |
| mlir::acc::TerminatorOp::create(builder, loc); |
| return recipe; |
| } |
| |
| /// Return the corresponding enum value for the mlir::acc::ReductionOperator |
| /// from the parser representation. |
| static mlir::acc::ReductionOperator |
| getReductionOperator(const Fortran::parser::ReductionOperator &op) { |
| switch (op.v) { |
| case Fortran::parser::ReductionOperator::Operator::Plus: |
| return mlir::acc::ReductionOperator::AccAdd; |
| case Fortran::parser::ReductionOperator::Operator::Multiply: |
| return mlir::acc::ReductionOperator::AccMul; |
| case Fortran::parser::ReductionOperator::Operator::Max: |
| return mlir::acc::ReductionOperator::AccMax; |
| case Fortran::parser::ReductionOperator::Operator::Min: |
| return mlir::acc::ReductionOperator::AccMin; |
| case Fortran::parser::ReductionOperator::Operator::Iand: |
| return mlir::acc::ReductionOperator::AccIand; |
| case Fortran::parser::ReductionOperator::Operator::Ior: |
| return mlir::acc::ReductionOperator::AccIor; |
| case Fortran::parser::ReductionOperator::Operator::Ieor: |
| return mlir::acc::ReductionOperator::AccXor; |
| case Fortran::parser::ReductionOperator::Operator::And: |
| return mlir::acc::ReductionOperator::AccLand; |
| case Fortran::parser::ReductionOperator::Operator::Or: |
| return mlir::acc::ReductionOperator::AccLor; |
| case Fortran::parser::ReductionOperator::Operator::Eqv: |
| return mlir::acc::ReductionOperator::AccEqv; |
| case Fortran::parser::ReductionOperator::Operator::Neqv: |
| return mlir::acc::ReductionOperator::AccNeqv; |
| } |
| llvm_unreachable("unexpected reduction operator"); |
| } |
| |
| template <typename Op> |
| static mlir::Value genLogicalCombiner(fir::FirOpBuilder &builder, |
| mlir::Location loc, mlir::Value value1, |
| mlir::Value value2) { |
| mlir::Type i1 = builder.getI1Type(); |
| mlir::Value v1 = fir::ConvertOp::create(builder, loc, i1, value1); |
| mlir::Value v2 = fir::ConvertOp::create(builder, loc, i1, value2); |
| mlir::Value combined = Op::create(builder, loc, v1, v2); |
| return fir::ConvertOp::create(builder, loc, value1.getType(), combined); |
| } |
| |
| static mlir::Value genComparisonCombiner(fir::FirOpBuilder &builder, |
| mlir::Location loc, |
| mlir::arith::CmpIPredicate pred, |
| mlir::Value value1, |
| mlir::Value value2) { |
| mlir::Type i1 = builder.getI1Type(); |
| mlir::Value v1 = fir::ConvertOp::create(builder, loc, i1, value1); |
| mlir::Value v2 = fir::ConvertOp::create(builder, loc, i1, value2); |
| mlir::Value add = mlir::arith::CmpIOp::create(builder, loc, pred, v1, v2); |
| return fir::ConvertOp::create(builder, loc, value1.getType(), add); |
| } |
| |
| static mlir::Value genScalarCombiner(fir::FirOpBuilder &builder, |
| mlir::Location loc, |
| mlir::acc::ReductionOperator op, |
| mlir::Type ty, mlir::Value value1, |
| mlir::Value value2) { |
| value1 = builder.loadIfRef(loc, value1); |
| value2 = builder.loadIfRef(loc, value2); |
| if (op == mlir::acc::ReductionOperator::AccAdd) { |
| if (ty.isIntOrIndex()) |
| return mlir::arith::AddIOp::create(builder, loc, value1, value2); |
| if (mlir::isa<mlir::FloatType>(ty)) |
| return mlir::arith::AddFOp::create(builder, loc, value1, value2); |
| if (auto cmplxTy = mlir::dyn_cast_or_null<mlir::ComplexType>(ty)) |
| return fir::AddcOp::create(builder, loc, value1, value2); |
| TODO(loc, "reduction add type"); |
| } |
| |
| if (op == mlir::acc::ReductionOperator::AccMul) { |
| if (ty.isIntOrIndex()) |
| return mlir::arith::MulIOp::create(builder, loc, value1, value2); |
| if (mlir::isa<mlir::FloatType>(ty)) |
| return mlir::arith::MulFOp::create(builder, loc, value1, value2); |
| if (mlir::isa<mlir::ComplexType>(ty)) |
| return fir::MulcOp::create(builder, loc, value1, value2); |
| TODO(loc, "reduction mul type"); |
| } |
| |
| if (op == mlir::acc::ReductionOperator::AccMin) |
| return fir::genMin(builder, loc, {value1, value2}); |
| |
| if (op == mlir::acc::ReductionOperator::AccMax) |
| return fir::genMax(builder, loc, {value1, value2}); |
| |
| if (op == mlir::acc::ReductionOperator::AccIand) |
| return mlir::arith::AndIOp::create(builder, loc, value1, value2); |
| |
| if (op == mlir::acc::ReductionOperator::AccIor) |
| return mlir::arith::OrIOp::create(builder, loc, value1, value2); |
| |
| if (op == mlir::acc::ReductionOperator::AccXor) |
| return mlir::arith::XOrIOp::create(builder, loc, value1, value2); |
| |
| if (op == mlir::acc::ReductionOperator::AccLand) |
| return genLogicalCombiner<mlir::arith::AndIOp>(builder, loc, value1, |
| value2); |
| |
| if (op == mlir::acc::ReductionOperator::AccLor) |
| return genLogicalCombiner<mlir::arith::OrIOp>(builder, loc, value1, value2); |
| |
| if (op == mlir::acc::ReductionOperator::AccEqv) |
| return genComparisonCombiner(builder, loc, mlir::arith::CmpIPredicate::eq, |
| value1, value2); |
| |
| if (op == mlir::acc::ReductionOperator::AccNeqv) |
| return genComparisonCombiner(builder, loc, mlir::arith::CmpIPredicate::ne, |
| value1, value2); |
| |
| TODO(loc, "reduction operator"); |
| } |
| |
| mlir::acc::ReductionRecipeOp Fortran::lower::createOrGetReductionRecipe( |
| fir::FirOpBuilder &builder, llvm::StringRef recipeName, mlir::Location loc, |
| mlir::Type ty, mlir::acc::ReductionOperator op, |
| llvm::SmallVector<mlir::Value> &bounds) { |
| mlir::ModuleOp mod = |
| builder.getBlock()->getParent()->getParentOfType<mlir::ModuleOp>(); |
| if (auto recipe = mod.lookupSymbol<mlir::acc::ReductionRecipeOp>(recipeName)) |
| return recipe; |
| |
| mlir::OpBuilder::InsertionGuard guard(builder); |
| auto recipe = genRecipeOp<mlir::acc::ReductionRecipeOp>( |
| builder, mod, recipeName, loc, ty, op); |
| bool allConstantBound = fir::acc::areAllBoundsConstant(bounds); |
| |
| auto [dest, src] = genRecipeCombinerOrCopyRegion( |
| builder, loc, ty, recipe.getCombinerRegion(), bounds, allConstantBound); |
| // Generate loops that combine and assign the inputs into dest (or array |
| // section of the inputs when there are bounds). |
| hlfir::Entity srcSection = src; |
| hlfir::Entity destSection = dest; |
| if (!bounds.empty()) |
| std::tie(srcSection, destSection) = genArraySectionsInRecipe( |
| builder, loc, bounds, recipe.getCombinerRegion().getArguments(), |
| allConstantBound, srcSection, destSection); |
| |
| mlir::Type elementType = fir::getFortranElementType(ty); |
| auto genKernel = [&](mlir::Location l, fir::FirOpBuilder &b, |
| hlfir::Entity srcElementValue, |
| hlfir::Entity destElementValue) -> hlfir::Entity { |
| return hlfir::Entity{genScalarCombiner(builder, loc, op, elementType, |
| srcElementValue, destElementValue)}; |
| }; |
| hlfir::genNoAliasAssignment(loc, builder, srcSection, destSection, |
| /*emitWorkshareLoop=*/false, |
| /*temporaryLHS=*/false, genKernel); |
| mlir::acc::YieldOp::create(builder, loc, dest); |
| return recipe; |
| } |
| |
| static bool isSupportedReductionType(mlir::Type ty) { |
| ty = fir::unwrapRefType(ty); |
| if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(ty)) |
| return isSupportedReductionType(boxTy.getEleTy()); |
| if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(ty)) |
| return isSupportedReductionType(seqTy.getEleTy()); |
| if (auto heapTy = mlir::dyn_cast<fir::HeapType>(ty)) |
| return isSupportedReductionType(heapTy.getEleTy()); |
| if (auto ptrTy = mlir::dyn_cast<fir::PointerType>(ty)) |
| return isSupportedReductionType(ptrTy.getEleTy()); |
| return fir::isa_trivial(ty); |
| } |
| |
| static void |
| genReductions(const Fortran::parser::AccObjectListWithReduction &objectList, |
| Fortran::lower::AbstractConverter &converter, |
| Fortran::semantics::SemanticsContext &semanticsContext, |
| Fortran::lower::StatementContext &stmtCtx, |
| llvm::SmallVectorImpl<mlir::Value> &reductionOperands, |
| llvm::ArrayRef<mlir::Value> async, |
| llvm::ArrayRef<mlir::Attribute> asyncDeviceTypes, |
| llvm::ArrayRef<mlir::Attribute> asyncOnlyDeviceTypes, |
| AccDataMap *dataMap = nullptr) { |
| fir::FirOpBuilder &builder = converter.getFirOpBuilder(); |
| const auto &objects = std::get<Fortran::parser::AccObjectList>(objectList.t); |
| const auto &op = std::get<Fortran::parser::ReductionOperator>(objectList.t); |
| mlir::acc::ReductionOperator mlirOp = getReductionOperator(op); |
| Fortran::evaluate::ExpressionAnalyzer ea{semanticsContext}; |
| for (const auto &accObject : objects.v) { |
| llvm::SmallVector<mlir::Value> bounds; |
| std::stringstream asFortran; |
| mlir::Location operandLocation = genOperandLocation(converter, accObject); |
| Fortran::semantics::Symbol &symbol = getSymbolFromAccObject(accObject); |
| Fortran::semantics::MaybeExpr designator = Fortran::common::visit( |
| [&](auto &&s) { return ea.Analyze(s); }, accObject.u); |
| bool isWholeSymbol = |
| !designator || Fortran::evaluate::UnwrapWholeSymbolDataRef(*designator); |
| fir::factory::AddrAndBoundsInfo info = |
| Fortran::lower::gatherDataOperandAddrAndBounds< |
| mlir::acc::DataBoundsOp, mlir::acc::DataBoundsType>( |
| converter, builder, semanticsContext, stmtCtx, symbol, designator, |
| operandLocation, asFortran, bounds, |
| /*treatIndexAsSection=*/true, /*unwrapFirBox=*/false, |
| /*genDefaultBounds=*/generateDefaultBounds, |
| /*strideIncludeLowerExtent=*/strideIncludeLowerExtent, |
| /*loadAllocatableAndPointerComponent=*/false); |
| LLVM_DEBUG(llvm::dbgs() << __func__ << "\n"; info.dump(llvm::dbgs())); |
| |
| mlir::Type reductionTy = fir::unwrapRefType(info.addr.getType()); |
| if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(reductionTy)) |
| reductionTy = seqTy.getEleTy(); |
| |
| if (!isSupportedReductionType(reductionTy)) |
| TODO(operandLocation, "reduction with unsupported type"); |
| |
| if (designator) { |
| Fortran::semantics::SomeExpr someExpr = *designator; |
| if (Fortran::lower::detail::getRef<Fortran::evaluate::Component>( |
| someExpr)) { |
| TODO(operandLocation, |
| "OpenACC reduction with component reference not yet supported"); |
| } |
| } |
| |
| auto op = createDataEntryOp<mlir::acc::ReductionOp>( |
| builder, operandLocation, info.addr, asFortran, bounds, |
| /*structured=*/true, /*implicit=*/false, |
| mlir::acc::DataClause::acc_reduction, info.addr.getType(), async, |
| asyncDeviceTypes, asyncOnlyDeviceTypes, /*unwrapBoxAddr=*/true); |
| mlir::Type ty = op.getAccVar().getType(); |
| if (!fir::acc::areAllBoundsConstant(bounds) || |
| fir::isAssumedShape(info.addr.getType()) || |
| fir::isAllocatableOrPointerArray(info.addr.getType())) |
| ty = info.addr.getType(); |
| std::string recipeName = fir::acc::getRecipeName( |
| mlir::acc::RecipeKind::reduction_recipe, ty, info.addr, bounds, mlirOp); |
| |
| mlir::acc::ReductionRecipeOp recipe = |
| Fortran::lower::createOrGetReductionRecipe( |
| builder, recipeName, operandLocation, ty, mlirOp, bounds); |
| op.setRecipeAttr( |
| mlir::SymbolRefAttr::get(builder.getContext(), recipe.getSymName())); |
| reductionOperands.push_back(op.getAccVar()); |
| // Track the symbol and its corresponding mlir::Value if requested so that |
| // accesses inside the compute/loop regions use the acc.reduction variable. |
| if (dataMap && isWholeSymbol) |
| dataMap->emplaceSymbol(op.getAccVar(), |
| Fortran::semantics::SymbolRef(symbol)); |
| } |
| } |
| |
| template <typename Op, typename Terminator> |
| static Op |
| createRegionOp(fir::FirOpBuilder &builder, mlir::Location loc, |
| mlir::Location returnLoc, Fortran::lower::pft::Evaluation &eval, |
| const llvm::SmallVectorImpl<mlir::Value> &operands, |
| const llvm::SmallVectorImpl<int32_t> &operandSegments, |
| bool outerCombined = false, |
| llvm::SmallVector<mlir::Type> retTy = {}, |
| mlir::Value yieldValue = {}, mlir::TypeRange argsTy = {}, |
| llvm::SmallVector<mlir::Location> locs = {}) { |
| Op op = Op::create(builder, loc, retTy, operands); |
| builder.createBlock(&op.getRegion(), op.getRegion().end(), argsTy, locs); |
| mlir::Block &block = op.getRegion().back(); |
| builder.setInsertionPointToStart(&block); |
| |
| op->setAttr(Op::getOperandSegmentSizeAttr(), |
| builder.getDenseI32ArrayAttr(operandSegments)); |
| |
| // Place the insertion point to the start of the first block. |
| builder.setInsertionPointToStart(&block); |
| |
| // If it is an unstructured region and is not the outer region of a combined |
| // construct, create empty blocks for all evaluations. |
| if (eval.lowerAsUnstructured() && !outerCombined) |
| Fortran::lower::createEmptyRegionBlocks<mlir::acc::TerminatorOp, |
| mlir::acc::YieldOp>( |
| builder, eval.getNestedEvaluations()); |
| |
| if (yieldValue) { |
| if constexpr (std::is_same_v<Terminator, mlir::acc::YieldOp>) { |
| Terminator yieldOp = Terminator::create(builder, returnLoc, yieldValue); |
| yieldValue.getDefiningOp()->moveBefore(yieldOp); |
| } else { |
| Terminator::create(builder, returnLoc); |
| } |
| } else { |
| Terminator::create(builder, returnLoc); |
| } |
| builder.setInsertionPointToStart(&block); |
| return op; |
| } |
| |
| static void genAsyncClause(Fortran::lower::AbstractConverter &converter, |
| const Fortran::parser::AccClause::Async *asyncClause, |
| mlir::Value &async, bool &addAsyncAttr, |
| Fortran::lower::StatementContext &stmtCtx) { |
| const auto &asyncClauseValue = asyncClause->v; |
| if (asyncClauseValue) { // async has a value. |
| async = fir::getBase(converter.genExprValue( |
| *Fortran::semantics::GetExpr(*asyncClauseValue), stmtCtx)); |
| } else { |
| addAsyncAttr = true; |
| } |
| } |
| |
| static void |
| genAsyncClause(Fortran::lower::AbstractConverter &converter, |
| const Fortran::parser::AccClause::Async *asyncClause, |
| llvm::SmallVector<mlir::Value> &async, |
| llvm::SmallVector<mlir::Attribute> &asyncDeviceTypes, |
| llvm::SmallVector<mlir::Attribute> &asyncOnlyDeviceTypes, |
| llvm::SmallVector<mlir::Attribute> &deviceTypeAttrs, |
| Fortran::lower::StatementContext &stmtCtx) { |
| const auto &asyncClauseValue = asyncClause->v; |
| if (asyncClauseValue) { // async has a value. |
| mlir::Value asyncValue = fir::getBase(converter.genExprValue( |
| *Fortran::semantics::GetExpr(*asyncClauseValue), stmtCtx)); |
| for (auto deviceTypeAttr : deviceTypeAttrs) { |
| async.push_back(asyncValue); |
| asyncDeviceTypes.push_back(deviceTypeAttr); |
| } |
| } else { |
| for (auto deviceTypeAttr : deviceTypeAttrs) |
| asyncOnlyDeviceTypes.push_back(deviceTypeAttr); |
| } |
| } |
| |
| static mlir::acc::DeviceType |
| getDeviceType(Fortran::common::OpenACCDeviceType device) { |
| switch (device) { |
| case Fortran::common::OpenACCDeviceType::Star: |
| return mlir::acc::DeviceType::Star; |
| case Fortran::common::OpenACCDeviceType::Default: |
| return mlir::acc::DeviceType::Default; |
| case Fortran::common::OpenACCDeviceType::Nvidia: |
| return mlir::acc::DeviceType::Nvidia; |
| case Fortran::common::OpenACCDeviceType::Radeon: |
| return mlir::acc::DeviceType::Radeon; |
| case Fortran::common::OpenACCDeviceType::Host: |
| return mlir::acc::DeviceType::Host; |
| case Fortran::common::OpenACCDeviceType::Multicore: |
| return mlir::acc::DeviceType::Multicore; |
| case Fortran::common::OpenACCDeviceType::None: |
| return mlir::acc::DeviceType::None; |
| } |
| return mlir::acc::DeviceType::None; |
| } |
| |
| static void gatherDeviceTypeAttrs( |
| fir::FirOpBuilder &builder, |
| const Fortran::parser::AccClause::DeviceType *deviceTypeClause, |
| llvm::SmallVector<mlir::Attribute> &deviceTypes) { |
| const Fortran::parser::AccDeviceTypeExprList &deviceTypeExprList = |
| deviceTypeClause->v; |
| for (const auto &deviceTypeExpr : deviceTypeExprList.v) |
| deviceTypes.push_back(mlir::acc::DeviceTypeAttr::get( |
| builder.getContext(), getDeviceType(deviceTypeExpr.v))); |
| } |
| |
| static void genIfClause(Fortran::lower::AbstractConverter &converter, |
| mlir::Location clauseLocation, |
| const Fortran::parser::AccClause::If *ifClause, |
| mlir::Value &ifCond, |
| Fortran::lower::StatementContext &stmtCtx) { |
| fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); |
| mlir::Value cond = fir::getBase(converter.genExprValue( |
| *Fortran::semantics::GetExpr(ifClause->v), stmtCtx, &clauseLocation)); |
| ifCond = firOpBuilder.createConvert(clauseLocation, firOpBuilder.getI1Type(), |
| cond); |
| } |
| |
| static void genWaitClause(Fortran::lower::AbstractConverter &converter, |
| const Fortran::parser::AccClause::Wait *waitClause, |
| llvm::SmallVectorImpl<mlir::Value> &operands, |
| mlir::Value &waitDevnum, bool &addWaitAttr, |
| Fortran::lower::StatementContext &stmtCtx) { |
| const auto &waitClauseValue = waitClause->v; |
| if (waitClauseValue) { // wait has a value. |
| const Fortran::parser::AccWaitArgument &waitArg = *waitClauseValue; |
| const auto &waitList = |
| std::get<std::list<Fortran::parser::ScalarIntExpr>>(waitArg.t); |
| for (const Fortran::parser::ScalarIntExpr &value : waitList) { |
| mlir::Value v = fir::getBase( |
| converter.genExprValue(*Fortran::semantics::GetExpr(value), stmtCtx)); |
| operands.push_back(v); |
| } |
| |
| const auto &waitDevnumValue = |
| std::get<std::optional<Fortran::parser::ScalarIntExpr>>(waitArg.t); |
| if (waitDevnumValue) |
| waitDevnum = fir::getBase(converter.genExprValue( |
| *Fortran::semantics::GetExpr(*waitDevnumValue), stmtCtx)); |
| } else { |
| addWaitAttr = true; |
| } |
| } |
| |
| static void genWaitClauseWithDeviceType( |
| Fortran::lower::AbstractConverter &converter, |
| const Fortran::parser::AccClause::Wait *waitClause, |
| llvm::SmallVector<mlir::Value> &waitOperands, |
| llvm::SmallVector<mlir::Attribute> &waitOperandsDeviceTypes, |
| llvm::SmallVector<mlir::Attribute> &waitOnlyDeviceTypes, |
| llvm::SmallVector<bool> &hasDevnums, |
| llvm::SmallVector<int32_t> &waitOperandsSegments, |
| llvm::SmallVector<mlir::Attribute> deviceTypeAttrs, |
| Fortran::lower::StatementContext &stmtCtx) { |
| const auto &waitClauseValue = waitClause->v; |
| if (waitClauseValue) { // wait has a value. |
| llvm::SmallVector<mlir::Value> waitValues; |
| |
| const Fortran::parser::AccWaitArgument &waitArg = *waitClauseValue; |
| const auto &waitDevnumValue = |
| std::get<std::optional<Fortran::parser::ScalarIntExpr>>(waitArg.t); |
| bool hasDevnum = false; |
| if (waitDevnumValue) { |
| waitValues.push_back(fir::getBase(converter.genExprValue( |
| *Fortran::semantics::GetExpr(*waitDevnumValue), stmtCtx))); |
| hasDevnum = true; |
| } |
| |
| const auto &waitList = |
| std::get<std::list<Fortran::parser::ScalarIntExpr>>(waitArg.t); |
| for (const Fortran::parser::ScalarIntExpr &value : waitList) { |
| waitValues.push_back(fir::getBase(converter.genExprValue( |
| *Fortran::semantics::GetExpr(value), stmtCtx))); |
| } |
| |
| for (auto deviceTypeAttr : deviceTypeAttrs) { |
| for (auto value : waitValues) |
| waitOperands.push_back(value); |
| waitOperandsDeviceTypes.push_back(deviceTypeAttr); |
| waitOperandsSegments.push_back(waitValues.size()); |
| hasDevnums.push_back(hasDevnum); |
| } |
| } else { |
| for (auto deviceTypeAttr : deviceTypeAttrs) |
| waitOnlyDeviceTypes.push_back(deviceTypeAttr); |
| } |
| } |
| |
| mlir::Type getTypeFromIvTypeSize(fir::FirOpBuilder &builder, |
| const Fortran::semantics::Symbol &ivSym) { |
| std::size_t ivTypeSize = ivSym.size(); |
| if (ivTypeSize == 0) |
| llvm::report_fatal_error("unexpected induction variable size"); |
| // ivTypeSize is in bytes and IntegerType needs to be in bits. |
| return builder.getIntegerType(ivTypeSize * 8); |
| } |
| |
| static void privatizeIv( |
| Fortran::lower::AbstractConverter &converter, |
| const Fortran::semantics::Symbol &sym, mlir::Location loc, |
| llvm::SmallVector<mlir::Type> &ivTypes, |
| llvm::SmallVector<mlir::Location> &ivLocs, |
| llvm::SmallVector<mlir::Value> &privateOperands, |
| llvm::SmallVectorImpl<std::pair<mlir::Value, Fortran::semantics::SymbolRef>> |
| &ivPrivate, |
| bool isDoConcurrent = false) { |
| fir::FirOpBuilder &builder = converter.getFirOpBuilder(); |
| |
| mlir::Type ivTy = getTypeFromIvTypeSize(builder, sym); |
| ivTypes.push_back(ivTy); |
| ivLocs.push_back(loc); |
| mlir::Value ivValue = converter.getSymbolAddress(sym); |
| if (!ivValue && isDoConcurrent) { |
| // DO CONCURRENT induction variables are not mapped yet since they are local |
| // to the DO CONCURRENT scope. |
| mlir::OpBuilder::InsertPoint insPt = builder.saveInsertionPoint(); |
| builder.setInsertionPointToStart(builder.getAllocaBlock()); |
| ivValue = builder.createTemporaryAlloc(loc, ivTy, toStringRef(sym.name())); |
| builder.restoreInsertionPoint(insPt); |
| } |
| |
| mlir::Operation *privateOp = nullptr; |
| for (auto privateVal : privateOperands) { |
| if (mlir::acc::getVar(privateVal.getDefiningOp()) == ivValue) { |
| privateOp = privateVal.getDefiningOp(); |
| break; |
| } |
| } |
| |
| if (privateOp == nullptr) { |
| llvm::SmallVector<mlir::Value> noBounds; |
| mlir::SymbolRefAttr recipe = createOrGetRecipe( |
| builder, loc, mlir::acc::RecipeKind::private_recipe, ivValue, noBounds); |
| |
| std::stringstream asFortran; |
| asFortran << Fortran::lower::mangle::demangleName(toStringRef(sym.name())); |
| auto op = createDataEntryOp<mlir::acc::PrivateOp>( |
| builder, loc, ivValue, asFortran, {}, true, |
| /*implicit=*/true, mlir::acc::DataClause::acc_private, |
| ivValue.getType(), |
| /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{}); |
| op.setRecipeAttr(recipe); |
| privateOp = op.getOperation(); |
| |
| privateOperands.push_back(op.getAccVar()); |
| } |
| |
| ivPrivate.emplace_back(mlir::acc::getAccVar(privateOp), |
| Fortran::semantics::SymbolRef(sym)); |
| } |
| |
| static void determineDefaultLoopParMode( |
| Fortran::lower::AbstractConverter &converter, mlir::acc::LoopOp &loopOp, |
| llvm::SmallVector<mlir::Attribute> &seqDeviceTypes, |
| llvm::SmallVector<mlir::Attribute> &independentDeviceTypes, |
| llvm::SmallVector<mlir::Attribute> &autoDeviceTypes) { |
| auto hasDeviceNone = [](mlir::Attribute attr) -> bool { |
| return mlir::dyn_cast<mlir::acc::DeviceTypeAttr>(attr).getValue() == |
| mlir::acc::DeviceType::None; |
| }; |
| bool hasDefaultSeq = llvm::any_of(seqDeviceTypes, hasDeviceNone); |
| bool hasDefaultIndependent = |
| llvm::any_of(independentDeviceTypes, hasDeviceNone); |
| bool hasDefaultAuto = llvm::any_of(autoDeviceTypes, hasDeviceNone); |
| if (hasDefaultSeq || hasDefaultIndependent || hasDefaultAuto) |
| return; // Default loop par mode is already specified. |
| |
| mlir::Region *currentRegion = |
| converter.getFirOpBuilder().getBlock()->getParent(); |
| mlir::Operation *parentOp = mlir::acc::getEnclosingComputeOp(*currentRegion); |
| const bool isOrphanedLoop = !parentOp; |
| if (isOrphanedLoop || |
| mlir::isa_and_present<mlir::acc::ParallelOp>(parentOp)) { |
| // As per OpenACC 3.3 standard section 2.9.6 independent clause: |
| // A loop construct with no auto or seq clause is treated as if it has the |
| // independent clause when it is an orphaned loop construct or its parent |
| // compute construct is a parallel construct. |
| independentDeviceTypes.push_back(mlir::acc::DeviceTypeAttr::get( |
| converter.getFirOpBuilder().getContext(), mlir::acc::DeviceType::None)); |
| } else if (mlir::isa_and_present<mlir::acc::SerialOp>(parentOp)) { |
| // Serial construct implies `seq` clause on loop. However, this |
| // conflicts with parallelism assignment if already set. Therefore check |
| // that first. |
| bool hasDefaultGangWorkerOrVector = |
| loopOp.hasVector() || loopOp.getVectorValue() || loopOp.hasWorker() || |
| loopOp.getWorkerValue() || loopOp.hasGang() || |
| loopOp.getGangValue(mlir::acc::GangArgType::Num) || |
| loopOp.getGangValue(mlir::acc::GangArgType::Dim) || |
| loopOp.getGangValue(mlir::acc::GangArgType::Static); |
| if (!hasDefaultGangWorkerOrVector) |
| seqDeviceTypes.push_back(mlir::acc::DeviceTypeAttr::get( |
| converter.getFirOpBuilder().getContext(), |
| mlir::acc::DeviceType::None)); |
| // Since the loop has some parallelism assigned - we cannot assign `seq`. |
| // However, the `acc.loop` verifier will check that one of seq, independent, |
| // or auto is marked. Seems reasonable to mark as auto since the OpenACC |
| // spec does say "If not, or if it is unable to make a determination, it |
| // must treat the auto clause as if it is a seq clause, and it must |
| // ignore any gang, worker, or vector clauses on the loop construct" |
| else |
| autoDeviceTypes.push_back(mlir::acc::DeviceTypeAttr::get( |
| converter.getFirOpBuilder().getContext(), |
| mlir::acc::DeviceType::None)); |
| } else { |
| // As per OpenACC 3.3 standard section 2.9.7 auto clause: |
| // When the parent compute construct is a kernels construct, a loop |
| // construct with no independent or seq clause is treated as if it has the |
| // auto clause. |
| assert(mlir::isa_and_present<mlir::acc::KernelsOp>(parentOp) && |
| "Expected kernels construct"); |
| autoDeviceTypes.push_back(mlir::acc::DeviceTypeAttr::get( |
| converter.getFirOpBuilder().getContext(), mlir::acc::DeviceType::None)); |
| } |
| } |
| |
| // Helper to visit Bounds of DO LOOP nest. |
| static void visitLoopControl( |
| Fortran::lower::AbstractConverter &converter, |
| const Fortran::parser::DoConstruct &outerDoConstruct, |
| uint64_t loopsToProcess, Fortran::lower::pft::Evaluation &eval, |
| std::function<void(const Fortran::parser::LoopControl::Bounds &, |
| mlir::Location)> |
| callback) { |
| Fortran::lower::pft::Evaluation *crtEval = &eval.getFirstNestedEvaluation(); |
| for (uint64_t i = 0; i < loopsToProcess; ++i) { |
| const Fortran::parser::LoopControl *loopControl; |
| if (i == 0) { |
| loopControl = &*outerDoConstruct.GetLoopControl(); |
| mlir::Location loc = converter.genLocation( |
| Fortran::parser::FindSourceLocation(outerDoConstruct)); |
| callback(std::get<Fortran::parser::LoopControl::Bounds>(loopControl->u), |
| loc); |
| } else { |
| // Safely locate the next inner DoConstruct within this eval. |
| const Fortran::parser::DoConstruct *innerDo = nullptr; |
| if (crtEval && crtEval->hasNestedEvaluations()) { |
| for (Fortran::lower::pft::Evaluation &child : |
| crtEval->getNestedEvaluations()) { |
| if (auto *stmt = child.getIf<Fortran::parser::DoConstruct>()) { |
| innerDo = stmt; |
| // Prepare to descend for the next iteration |
| crtEval = &child; |
| break; |
| } |
| } |
| } |
| if (!innerDo) |
| break; // No deeper loop; stop collecting collapsed bounds. |
| |
| loopControl = &*innerDo->GetLoopControl(); |
| mlir::Location loc = |
| converter.genLocation(Fortran::parser::FindSourceLocation(*innerDo)); |
| callback(std::get<Fortran::parser::LoopControl::Bounds>(loopControl->u), |
| loc); |
| } |
| } |
| } |
| |
| // Extract loop bounds, steps, induction variables, and privatization info |
| // for both DO CONCURRENT and regular do loops |
| static void processDoLoopBounds( |
| Fortran::lower::AbstractConverter &converter, |
| mlir::Location currentLocation, Fortran::lower::StatementContext &stmtCtx, |
| fir::FirOpBuilder &builder, |
| const Fortran::parser::DoConstruct &outerDoConstruct, |
| Fortran::lower::pft::Evaluation &eval, |
| llvm::SmallVector<mlir::Value> &lowerbounds, |
| llvm::SmallVector<mlir::Value> &upperbounds, |
| llvm::SmallVector<mlir::Value> &steps, |
| llvm::SmallVector<mlir::Value> &privateOperands, |
| llvm::SmallVectorImpl<std::pair<mlir::Value, Fortran::semantics::SymbolRef>> |
| &ivPrivate, |
| llvm::SmallVector<mlir::Type> &ivTypes, |
| llvm::SmallVector<mlir::Location> &ivLocs, |
| llvm::SmallVector<bool> &inclusiveBounds, |
| llvm::SmallVector<mlir::Location> &locs, uint64_t loopsToProcess) { |
| assert(loopsToProcess > 0 && "expect at least one loop"); |
| locs.push_back(currentLocation); // Location of the directive |
| bool isDoConcurrent = outerDoConstruct.IsDoConcurrent(); |
| |
| if (isDoConcurrent) { |
| locs.push_back(converter.genLocation( |
| Fortran::parser::FindSourceLocation(outerDoConstruct))); |
| const Fortran::parser::LoopControl *loopControl = |
| &*outerDoConstruct.GetLoopControl(); |
| const auto &concurrent = |
| std::get<Fortran::parser::LoopControl::Concurrent>(loopControl->u); |
| if (!std::get<std::list<Fortran::parser::LocalitySpec>>(concurrent.t) |
| .empty()) |
| TODO(currentLocation, "DO CONCURRENT with locality spec inside ACC"); |
| |
| const auto &concurrentHeader = |
| std::get<Fortran::parser::ConcurrentHeader>(concurrent.t); |
| const auto &controls = |
| std::get<std::list<Fortran::parser::ConcurrentControl>>( |
| concurrentHeader.t); |
| for (const auto &control : controls) { |
| lowerbounds.push_back(fir::getBase(converter.genExprValue( |
| *Fortran::semantics::GetExpr(std::get<1>(control.t)), stmtCtx))); |
| upperbounds.push_back(fir::getBase(converter.genExprValue( |
| *Fortran::semantics::GetExpr(std::get<2>(control.t)), stmtCtx))); |
| if (const auto &expr = |
| std::get<std::optional<Fortran::parser::ScalarIntExpr>>( |
| control.t)) |
| steps.push_back(fir::getBase(converter.genExprValue( |
| *Fortran::semantics::GetExpr(*expr), stmtCtx))); |
| else // If `step` is not present, assume it is `1`. |
| steps.push_back(builder.createIntegerConstant( |
| currentLocation, upperbounds[upperbounds.size() - 1].getType(), 1)); |
| |
| const auto &name = std::get<Fortran::parser::Name>(control.t); |
| privatizeIv(converter, *name.symbol, currentLocation, ivTypes, ivLocs, |
| privateOperands, ivPrivate, isDoConcurrent); |
| |
| inclusiveBounds.push_back(true); |
| } |
| } else { |
| visitLoopControl( |
| converter, outerDoConstruct, loopsToProcess, eval, |
| [&](const Fortran::parser::LoopControl::Bounds &bounds, |
| mlir::Location loc) { |
| locs.push_back(loc); |
| lowerbounds.push_back(fir::getBase(converter.genExprValue( |
| *Fortran::semantics::GetExpr(bounds.lower), stmtCtx))); |
| upperbounds.push_back(fir::getBase(converter.genExprValue( |
| *Fortran::semantics::GetExpr(bounds.upper), stmtCtx))); |
| if (bounds.step) |
| steps.push_back(fir::getBase(converter.genExprValue( |
| *Fortran::semantics::GetExpr(bounds.step), stmtCtx))); |
| else // If `step` is not present, assume it is `1`. |
| steps.push_back(builder.createIntegerConstant( |
| currentLocation, upperbounds[upperbounds.size() - 1].getType(), |
| 1)); |
| Fortran::semantics::Symbol &ivSym = |
| bounds.name.thing.symbol->GetUltimate(); |
| privatizeIv(converter, ivSym, currentLocation, ivTypes, ivLocs, |
| privateOperands, ivPrivate); |
| |
| inclusiveBounds.push_back(true); |
| }); |
| } |
| } |
| |
| static void remapCommonBlockMember( |
| Fortran::lower::AbstractConverter &converter, mlir::Location loc, |
| const Fortran::semantics::Symbol &member, |
| mlir::Value newCommonBlockBaseAddress, |
| const Fortran::semantics::Symbol &commonBlockSymbol, |
| llvm::SmallPtrSetImpl<const Fortran::semantics::Symbol *> &seenSymbols) { |
| if (seenSymbols.contains(&member)) |
| return; |
| mlir::Value accMemberValue = Fortran::lower::genCommonBlockMember( |
| converter, loc, member, newCommonBlockBaseAddress, |
| commonBlockSymbol.size()); |
| fir::ExtendedValue hostExv = converter.getSymbolExtendedValue(member); |
| fir::ExtendedValue accExv = fir::substBase(hostExv, accMemberValue); |
| converter.bindSymbol(member, accExv); |
| seenSymbols.insert(&member); |
| } |
| |
| void AccDataMap::remapDataOperandSymbols( |
| Fortran::lower::AbstractConverter &converter, fir::FirOpBuilder &builder, |
| mlir::Region ®ion) const { |
| if (!enableSymbolRemapping || empty()) |
| return; |
| |
| // Map Symbols that appeared inside data clauses to a new hlfir.declare whose |
| // input is the acc data operation result. |
| // This allows isolating all the symbol accesses inside the compute region |
| // from accesses in the host and other regions while preserving the Fortran |
| // information about the symbols for Fortran specific optimizations inside the |
| // region. |
| Fortran::lower::SymMap &symbolMap = converter.getSymbolMap(); |
| mlir::OpBuilder::InsertionGuard insertGuard(builder); |
| builder.setInsertionPointToStart(®ion.front()); |
| llvm::SmallPtrSet<const Fortran::semantics::Symbol *, 8> seenSymbols; |
| mlir::IRMapping mapper; |
| for (auto [value, symbol] : symbols) { |
| // If a symbol appears on several data clause, just map it to the first |
| // result (all data operations results for a symbol are pointing same |
| // memory, so it does not matter which one is used). |
| if (seenSymbols.contains(&symbol.get())) |
| continue; |
| seenSymbols.insert(&symbol.get()); |
| mlir::Location loc = value.getLoc(); |
| // When a common block appears in a directive, remap its members. |
| // Note: this will instantiate all common block members even if they are not |
| // used inside the region. If hlfir.declare DCE is not made possible, this |
| // could be improved to reduce IR noise. |
| if (const auto *commonBlock = symbol->template detailsIf< |
| Fortran::semantics::CommonBlockDetails>()) { |
| const Fortran::semantics::Scope &commonScope = symbol->owner(); |
| if (commonScope.equivalenceSets().empty()) { |
| for (auto member : commonBlock->objects()) |
| remapCommonBlockMember(converter, loc, *member, value, *symbol, |
| seenSymbols); |
| } else { |
| // Objects equivalenced with common block members still belong to the |
| // common block storage even if they are not part of the common block |
| // declaration. The easiest and most robust way to find all symbols |
| // belonging to the common block is to loop through the scope symbols |
| // and check if they belong to the common. |
| for (const auto &scopeSymbol : commonScope) |
| if (Fortran::semantics::FindCommonBlockContaining( |
| *scopeSymbol.second) == &symbol.get()) |
| remapCommonBlockMember(converter, loc, *scopeSymbol.second, value, |
| *symbol, seenSymbols); |
| } |
| continue; |
| } |
| std::optional<fir::FortranVariableOpInterface> hostDef = |
| symbolMap.lookupVariableDefinition(symbol); |
| assert(hostDef.has_value() && llvm::isa<hlfir::DeclareOp>(*hostDef) && |
| "expected symbol to be mapped to hlfir.declare"); |
| auto hostDeclare = llvm::cast<hlfir::DeclareOp>(*hostDef); |
| // Replace base input and DummyScope inputs. |
| mlir::Value hostInput = hostDeclare.getMemref(); |
| mlir::Type hostType = hostInput.getType(); |
| mlir::Type computeType = value.getType(); |
| if (hostType == computeType) { |
| mapper.map(hostInput, value); |
| } else if (llvm::isa<fir::BaseBoxType>(computeType)) { |
| assert(!llvm::isa<fir::BaseBoxType>(hostType) && |
| "box type mismatch between compute region variable and " |
| "hlfir.declare input unexpected"); |
| if (Fortran::semantics::IsOptional(symbol)) |
| TODO(loc, "remapping OPTIONAL symbol in OpenACC compute region"); |
| auto rawValue = fir::BoxAddrOp::create(builder, loc, hostType, value); |
| mapper.map(hostInput, rawValue); |
| } else { |
| assert(!llvm::isa<fir::BaseBoxType>(hostType) && |
| "compute region variable should not be raw address when host " |
| "hlfir.declare input was a box"); |
| assert(fir::isBoxAddress(hostType) == fir::isBoxAddress(computeType) && |
| "compute region variable should be a pointer/allocatable if and " |
| "only if host is"); |
| assert(fir::isa_ref_type(hostType) && fir::isa_ref_type(computeType) && |
| "compute region variable and host variable should both be raw " |
| "addresses"); |
| mlir::Value cast = builder.createConvert(loc, hostType, value); |
| mapper.map(hostInput, cast); |
| } |
| if (mlir::Value dummyScope = hostDeclare.getDummyScope()) { |
| // Copy the dummy scope into the region so that aliasing rules about |
| // Fortran dummies are understood inside the region and the abstract dummy |
| // scope type does not have to cross the OpenACC compute region boundary. |
| if (!mapper.contains(dummyScope)) { |
| mlir::Operation *hostDummyScopeOp = dummyScope.getDefiningOp(); |
| assert(hostDummyScopeOp && |
| "dummyScope defining operation must be visible in lowering"); |
| (void)builder.clone(*hostDummyScopeOp, mapper); |
| } |
| } |
| |
| mlir::Operation *computeDef = |
| builder.clone(*hostDeclare.getOperation(), mapper); |
| |
| // The input box already went through an hlfir.declare. It has the correct |
| // local lower bounds and attribute. Do not generate a new fir.rebox. |
| if (llvm::isa<fir::BaseBoxType>(hostDeclare.getMemref().getType())) |
| llvm::cast<hlfir::DeclareOp>(*computeDef).setSkipRebox(true); |
| |
| symbolMap.addVariableDefinition( |
| symbol, llvm::cast<fir::FortranVariableOpInterface>(computeDef)); |
| } |
| |
| for (const auto &comp : components) { |
| mlir::Location loc = comp.accValue.getLoc(); |
| hlfir::DesignateOp designate = |
| comp.designate.getDefiningOp<hlfir::DesignateOp>(); |
| // If this is not a designate, it means the component was already remap in a |
| // parent construct, and the declare should be cloned instead. |
| if (!designate) |
| TODO(comp.designate.getLoc(), |
| "nested component reference in OpenACC clause"); |
| |
| auto declare = hlfir::DeclareOp::create( |
| builder, loc, comp.accValue, mlir::acc::getVariableName(comp.accValue), |
| designate.getShape(), designate.getTypeparams(), /*dummyScope=*/{}, |
| /*storage=*/{}, |
| /*storageOffset=*/0, designate.getFortranAttrsAttr()); |
| symbolMap.addComponentOverride(comp.component, declare); |
| } |
| } |
| |
| static void privatizeInductionVariables( |
| Fortran::lower::AbstractConverter &converter, |
| mlir::Location currentLocation, |
| const Fortran::parser::DoConstruct &outerDoConstruct, |
| Fortran::lower::pft::Evaluation &eval, |
| llvm::SmallVector<mlir::Value> &privateOperands, |
| llvm::SmallVectorImpl<std::pair<mlir::Value, Fortran::semantics::SymbolRef>> |
| &ivPrivate, |
| llvm::SmallVector<mlir::Location> &locs, uint64_t loopsToProcess) { |
| // ivTypes and locs will be ignored since no acc.loop control arguments will |
| // be created. |
| llvm::SmallVector<mlir::Type> ivTypes; |
| llvm::SmallVector<mlir::Location> ivLocs; |
| assert(!outerDoConstruct.IsDoConcurrent() && |
| "do concurrent loops are not expected to contained earlty exits"); |
| visitLoopControl(converter, outerDoConstruct, loopsToProcess, eval, |
| [&](const Fortran::parser::LoopControl::Bounds &bounds, |
| mlir::Location loc) { |
| locs.push_back(loc); |
| Fortran::semantics::Symbol &ivSym = |
| bounds.name.thing.symbol->GetUltimate(); |
| privatizeIv(converter, ivSym, currentLocation, ivTypes, |
| ivLocs, privateOperands, ivPrivate); |
| }); |
| } |
| |
| static mlir::acc::LoopOp |
| buildACCLoopOp(Fortran::lower::AbstractConverter &converter, |
| mlir::Location currentLocation, |
| Fortran::semantics::SemanticsContext &semanticsContext, |
| Fortran::lower::StatementContext &stmtCtx, |
| const Fortran::parser::DoConstruct &outerDoConstruct, |
| Fortran::lower::pft::Evaluation &eval, |
| llvm::SmallVector<mlir::Value> &privateOperands, |
| AccDataMap &dataMap, |
| llvm::SmallVector<mlir::Value> &gangOperands, |
| llvm::SmallVector<mlir::Value> &workerNumOperands, |
| llvm::SmallVector<mlir::Value> &vectorOperands, |
| llvm::SmallVector<mlir::Value> &tileOperands, |
| llvm::SmallVector<mlir::Value> &cacheOperands, |
| llvm::SmallVector<mlir::Value> &reductionOperands, |
| llvm::SmallVector<mlir::Type> &retTy, mlir::Value yieldValue, |
| uint64_t loopsToProcess) { |
| fir::FirOpBuilder &builder = converter.getFirOpBuilder(); |
| |
| llvm::SmallVector<std::pair<mlir::Value, Fortran::semantics::SymbolRef>> |
| ivPrivate; |
| llvm::SmallVector<mlir::Type> ivTypes; |
| llvm::SmallVector<mlir::Location> ivLocs; |
| llvm::SmallVector<bool> inclusiveBounds; |
| llvm::SmallVector<mlir::Location> locs; |
| llvm::SmallVector<mlir::Value> lowerbounds, upperbounds, steps; |
| |
| // Look at the do/do concurrent loops to extract bounds information unless |
| // this loop is lowered in an unstructured fashion, in which case bounds are |
| // not represented on acc.loop and explicit control flow is used inside body. |
| if (!eval.lowerAsUnstructured()) { |
| processDoLoopBounds(converter, currentLocation, stmtCtx, builder, |
| outerDoConstruct, eval, lowerbounds, upperbounds, steps, |
| privateOperands, ivPrivate, ivTypes, ivLocs, |
| inclusiveBounds, locs, loopsToProcess); |
| } else { |
| // When the loop contains early exits, privatize induction variables, but do |
| // not create acc.loop bounds. The control flow of the loop will be |
| // generated explicitly in the acc.loop body that is just a container. |
| privatizeInductionVariables(converter, currentLocation, outerDoConstruct, |
| eval, privateOperands, ivPrivate, locs, |
| loopsToProcess); |
| } |
| llvm::SmallVector<mlir::Value> operands; |
| llvm::SmallVector<int32_t> operandSegments; |
| addOperands(operands, operandSegments, lowerbounds); |
| addOperands(operands, operandSegments, upperbounds); |
| addOperands(operands, operandSegments, steps); |
| addOperands(operands, operandSegments, gangOperands); |
| addOperands(operands, operandSegments, workerNumOperands); |
| addOperands(operands, operandSegments, vectorOperands); |
| addOperands(operands, operandSegments, tileOperands); |
| addOperands(operands, operandSegments, cacheOperands); |
| addOperands(operands, operandSegments, privateOperands); |
| // fill empty firstprivate operands since they are not permitted |
| // from OpenACC language perspective. |
| addOperands(operands, operandSegments, {}); |
| addOperands(operands, operandSegments, reductionOperands); |
| |
| auto loopOp = createRegionOp<mlir::acc::LoopOp, mlir::acc::YieldOp>( |
| builder, builder.getFusedLoc(locs), currentLocation, eval, operands, |
| operandSegments, /*outerCombined=*/false, retTy, yieldValue, ivTypes, |
| ivLocs); |
| // Ensure the iv symbol is mapped to private iv SSA value for the scope of |
| // the loop even if it did not appear explicitly in a PRIVATE clause (if it |
| // appeared explicitly in such clause, that is also fine because duplicates |
| // in the list are ignored). |
| dataMap.symbols.append(ivPrivate.begin(), ivPrivate.end()); |
| // Remap symbols from data clauses to use data operation results |
| dataMap.remapDataOperandSymbols(converter, builder, loopOp.getRegion()); |
| |
| if (!eval.lowerAsUnstructured()) { |
| for (auto [arg, iv] : |
| llvm::zip(loopOp.getLoopRegions().front()->front().getArguments(), |
| ivPrivate)) { |
| // Store block argument to the related iv private variable. |
| mlir::Value privateValue = converter.getSymbolAddress( |
| std::get<Fortran::semantics::SymbolRef>(iv)); |
| fir::StoreOp::create(builder, currentLocation, arg, privateValue); |
| } |
| loopOp.setInclusiveUpperbound(inclusiveBounds); |
| } else { |
| loopOp.setUnstructuredAttr(builder.getUnitAttr()); |
| } |
| |
| return loopOp; |
| } |
| |
| static bool hasEarlyReturn(Fortran::lower::pft::Evaluation &eval) { |
| bool hasReturnStmt = false; |
| for (auto &e : eval.getNestedEvaluations()) { |
| e.visit(Fortran::common::visitors{ |
| [&](const Fortran::parser::ReturnStmt &) { hasReturnStmt = true; }, |
| [&](const auto &s) {}, |
| }); |
| if (e.hasNestedEvaluations()) |
| hasReturnStmt = hasEarlyReturn(e); |
| } |
| return hasReturnStmt; |
| } |
| |
| static mlir::acc::LoopOp createLoopOp( |
| Fortran::lower::AbstractConverter &converter, |
| mlir::Location currentLocation, |
| Fortran::semantics::SemanticsContext &semanticsContext, |
| Fortran::lower::StatementContext &stmtCtx, |
| const Fortran::parser::DoConstruct &outerDoConstruct, |
| Fortran::lower::pft::Evaluation &eval, |
| const Fortran::parser::AccClauseList &accClauseList, |
| std::optional<mlir::acc::CombinedConstructsType> combinedConstructs = |
| std::nullopt) { |
| fir::FirOpBuilder &builder = converter.getFirOpBuilder(); |
| llvm::SmallVector<mlir::Value> tileOperands, privateOperands, |
| reductionOperands, cacheOperands, vectorOperands, workerNumOperands, |
| gangOperands; |
| llvm::SmallVector<int32_t> tileOperandsSegments, gangOperandsSegments; |
| llvm::SmallVector<int64_t> collapseValues; |
| |
| AccDataMap dataMap; |
| |
| llvm::SmallVector<mlir::Attribute> gangArgTypes; |
| llvm::SmallVector<mlir::Attribute> seqDeviceTypes, independentDeviceTypes, |
| autoDeviceTypes, vectorOperandsDeviceTypes, workerNumOperandsDeviceTypes, |
| vectorDeviceTypes, workerNumDeviceTypes, tileOperandsDeviceTypes, |
| collapseDeviceTypes, gangDeviceTypes, gangOperandsDeviceTypes; |
| |
| // device_type attribute is set to `none` until a device_type clause is |
| // encountered. |
| llvm::SmallVector<mlir::Attribute> crtDeviceTypes; |
| crtDeviceTypes.push_back(mlir::acc::DeviceTypeAttr::get( |
| builder.getContext(), mlir::acc::DeviceType::None)); |
| |
| for (const Fortran::parser::AccClause &clause : accClauseList.v) { |
| mlir::Location clauseLocation = converter.genLocation(clause.source); |
| if (const auto *gangClause = |
| std::get_if<Fortran::parser::AccClause::Gang>(&clause.u)) { |
| if (gangClause->v) { |
| const Fortran::parser::AccGangArgList &x = *gangClause->v; |
| mlir::SmallVector<mlir::Value> gangValues; |
| mlir::SmallVector<mlir::Attribute> gangArgs; |
| for (const Fortran::parser::AccGangArg &gangArg : x.v) { |
| if (const auto *num = |
| std::get_if<Fortran::parser::AccGangArg::Num>(&gangArg.u)) { |
| gangValues.push_back(fir::getBase(converter.genExprValue( |
| *Fortran::semantics::GetExpr(num->v), stmtCtx))); |
| gangArgs.push_back(mlir::acc::GangArgTypeAttr::get( |
| builder.getContext(), mlir::acc::GangArgType::Num)); |
| } else if (const auto *staticArg = |
| std::get_if<Fortran::parser::AccGangArg::Static>( |
| &gangArg.u)) { |
| const Fortran::parser::AccSizeExpr &sizeExpr = staticArg->v; |
| if (sizeExpr.v) { |
| gangValues.push_back(fir::getBase(converter.genExprValue( |
| *Fortran::semantics::GetExpr(*sizeExpr.v), stmtCtx))); |
| } else { |
| // * was passed as value and will be represented as a special |
| // constant. |
| gangValues.push_back(builder.createIntegerConstant( |
| clauseLocation, builder.getIndexType(), starCst)); |
| } |
| gangArgs.push_back(mlir::acc::GangArgTypeAttr::get( |
| builder.getContext(), mlir::acc::GangArgType::Static)); |
| } else if (const auto *dim = |
| std::get_if<Fortran::parser::AccGangArg::Dim>( |
| &gangArg.u)) { |
| gangValues.push_back(fir::getBase(converter.genExprValue( |
| *Fortran::semantics::GetExpr(dim->v), stmtCtx))); |
| gangArgs.push_back(mlir::acc::GangArgTypeAttr::get( |
| builder.getContext(), mlir::acc::GangArgType::Dim)); |
| } |
| } |
| for (auto crtDeviceTypeAttr : crtDeviceTypes) { |
| for (const auto &pair : llvm::zip(gangValues, gangArgs)) { |
| gangOperands.push_back(std::get<0>(pair)); |
| gangArgTypes.push_back(std::get<1>(pair)); |
| } |
| gangOperandsSegments.push_back(gangValues.size()); |
| gangOperandsDeviceTypes.push_back(crtDeviceTypeAttr); |
| } |
| } else { |
| for (auto crtDeviceTypeAttr : crtDeviceTypes) |
| gangDeviceTypes.push_back(crtDeviceTypeAttr); |
| } |
| } else if (const auto *workerClause = |
| std::get_if<Fortran::parser::AccClause::Worker>(&clause.u)) { |
| if (workerClause->v) { |
| mlir::Value workerNumValue = fir::getBase(converter.genExprValue( |
| *Fortran::semantics::GetExpr(*workerClause->v), stmtCtx)); |
| for (auto crtDeviceTypeAttr : crtDeviceTypes) { |
| workerNumOperands.push_back(workerNumValue); |
| workerNumOperandsDeviceTypes.push_back(crtDeviceTypeAttr); |
| } |
| } else { |
| for (auto crtDeviceTypeAttr : crtDeviceTypes) |
| workerNumDeviceTypes.push_back(crtDeviceTypeAttr); |
| } |
| } else if (const auto *vectorClause = |
| std::get_if<Fortran::parser::AccClause::Vector>(&clause.u)) { |
| if (vectorClause->v) { |
| mlir::Value vectorValue = fir::getBase(converter.genExprValue( |
| *Fortran::semantics::GetExpr(*vectorClause->v), stmtCtx)); |
| for (auto crtDeviceTypeAttr : crtDeviceTypes) { |
| vectorOperands.push_back(vectorValue); |
| vectorOperandsDeviceTypes.push_back(crtDeviceTypeAttr); |
| } |
| } else { |
| for (auto crtDeviceTypeAttr : crtDeviceTypes) |
| vectorDeviceTypes.push_back(crtDeviceTypeAttr); |
| } |
| } else if (const auto *tileClause = |
| std::get_if<Fortran::parser::AccClause::Tile>(&clause.u)) { |
| const Fortran::parser::AccTileExprList &accTileExprList = tileClause->v; |
| llvm::SmallVector<mlir::Value> tileValues; |
| for (const auto &accTileExpr : accTileExprList.v) { |
| const auto &expr = |
| std::get<std::optional<Fortran::parser::ScalarIntConstantExpr>>( |
| accTileExpr.t); |
| if (expr) { |
| tileValues.push_back(fir::getBase(converter.genExprValue( |
| *Fortran::semantics::GetExpr(*expr), stmtCtx))); |
| } else { |
| // * was passed as value and will be represented as a special |
| // constant. |
| mlir::Value tileStar = builder.createIntegerConstant( |
| clauseLocation, builder.getIntegerType(32), starCst); |
| tileValues.push_back(tileStar); |
| } |
| } |
| for (auto crtDeviceTypeAttr : crtDeviceTypes) { |
| for (auto value : tileValues) |
| tileOperands.push_back(value); |
| tileOperandsDeviceTypes.push_back(crtDeviceTypeAttr); |
| tileOperandsSegments.push_back(tileValues.size()); |
| } |
| } else if (const auto *privateClause = |
| std::get_if<Fortran::parser::AccClause::Private>( |
| &clause.u)) { |
| genDataOperandOperations<mlir::acc::PrivateOp>( |
| privateClause->v, converter, semanticsContext, stmtCtx, |
| privateOperands, mlir::acc::DataClause::acc_private, |
| /*structured=*/true, /*implicit=*/false, |
| /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{}, |
| /*setDeclareAttr=*/false, &dataMap); |
| } else if (const auto *reductionClause = |
| std::get_if<Fortran::parser::AccClause::Reduction>( |
| &clause.u)) { |
| genReductions(reductionClause->v, converter, semanticsContext, stmtCtx, |
| reductionOperands, /*async=*/{}, |
| /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{}, |
| &dataMap); |
| } else if (std::get_if<Fortran::parser::AccClause::Seq>(&clause.u)) { |
| for (auto crtDeviceTypeAttr : crtDeviceTypes) |
| seqDeviceTypes.push_back(crtDeviceTypeAttr); |
| } else if (std::get_if<Fortran::parser::AccClause::Independent>( |
| &clause.u)) { |
| for (auto crtDeviceTypeAttr : crtDeviceTypes) |
| independentDeviceTypes.push_back(crtDeviceTypeAttr); |
| } else if (std::get_if<Fortran::parser::AccClause::Auto>(&clause.u)) { |
| for (auto crtDeviceTypeAttr : crtDeviceTypes) |
| autoDeviceTypes.push_back(crtDeviceTypeAttr); |
| } else if (const auto *deviceTypeClause = |
| std::get_if<Fortran::parser::AccClause::DeviceType>( |
| &clause.u)) { |
| crtDeviceTypes.clear(); |
| gatherDeviceTypeAttrs(builder, deviceTypeClause, crtDeviceTypes); |
| } else if (const auto *collapseClause = |
| std::get_if<Fortran::parser::AccClause::Collapse>( |
| &clause.u)) { |
| const Fortran::parser::AccCollapseArg &arg = collapseClause->v; |
| const auto &intExpr = |
| std::get<Fortran::parser::ScalarIntConstantExpr>(arg.t); |
| const auto *expr = Fortran::semantics::GetExpr(intExpr); |
| const std::optional<int64_t> collapseValue = |
| Fortran::evaluate::ToInt64(*expr); |
| assert(collapseValue && "expect integer value for the collapse clause"); |
| |
| for (auto crtDeviceTypeAttr : crtDeviceTypes) { |
| collapseValues.push_back(*collapseValue); |
| collapseDeviceTypes.push_back(crtDeviceTypeAttr); |
| } |
| } |
| } |
| |
| llvm::SmallVector<mlir::Type> retTy; |
| mlir::Value yieldValue; |
| if (eval.lowerAsUnstructured() && hasEarlyReturn(eval)) { |
| // When there is a return statement inside the loop, add a result to the |
| // acc.loop that will be used in a conditional branch after the loop to |
| // return. |
| mlir::Type i1Ty = builder.getI1Type(); |
| yieldValue = builder.createIntegerConstant(currentLocation, i1Ty, 0); |
| retTy.push_back(i1Ty); |
| } |
| |
| uint64_t loopsToProcess = |
| Fortran::lower::getLoopCountForCollapseAndTile(accClauseList); |
| auto loopOp = buildACCLoopOp( |
| converter, currentLocation, semanticsContext, stmtCtx, outerDoConstruct, |
| eval, privateOperands, dataMap, gangOperands, workerNumOperands, |
| vectorOperands, tileOperands, cacheOperands, reductionOperands, retTy, |
| yieldValue, loopsToProcess); |
| |
| if (!gangDeviceTypes.empty()) |
| loopOp.setGangAttr(builder.getArrayAttr(gangDeviceTypes)); |
| if (!gangArgTypes.empty()) |
| loopOp.setGangOperandsArgTypeAttr(builder.getArrayAttr(gangArgTypes)); |
| if (!gangOperandsSegments.empty()) |
| loopOp.setGangOperandsSegmentsAttr( |
| builder.getDenseI32ArrayAttr(gangOperandsSegments)); |
| if (!gangOperandsDeviceTypes.empty()) |
| loopOp.setGangOperandsDeviceTypeAttr( |
| builder.getArrayAttr(gangOperandsDeviceTypes)); |
| |
| if (!workerNumDeviceTypes.empty()) |
| loopOp.setWorkerAttr(builder.getArrayAttr(workerNumDeviceTypes)); |
| if (!workerNumOperandsDeviceTypes.empty()) |
| loopOp.setWorkerNumOperandsDeviceTypeAttr( |
| builder.getArrayAttr(workerNumOperandsDeviceTypes)); |
| |
| if (!vectorDeviceTypes.empty()) |
| loopOp.setVectorAttr(builder.getArrayAttr(vectorDeviceTypes)); |
| if (!vectorOperandsDeviceTypes.empty()) |
| loopOp.setVectorOperandsDeviceTypeAttr( |
| builder.getArrayAttr(vectorOperandsDeviceTypes)); |
| |
| if (!tileOperandsDeviceTypes.empty()) |
| loopOp.setTileOperandsDeviceTypeAttr( |
| builder.getArrayAttr(tileOperandsDeviceTypes)); |
| if (!tileOperandsSegments.empty()) |
| loopOp.setTileOperandsSegmentsAttr( |
| builder.getDenseI32ArrayAttr(tileOperandsSegments)); |
| |
| // Determine the loop's default par mode - either seq, independent, or auto. |
| determineDefaultLoopParMode(converter, loopOp, seqDeviceTypes, |
| independentDeviceTypes, autoDeviceTypes); |
| if (!seqDeviceTypes.empty()) |
| loopOp.setSeqAttr(builder.getArrayAttr(seqDeviceTypes)); |
| if (!independentDeviceTypes.empty()) |
| loopOp.setIndependentAttr(builder.getArrayAttr(independentDeviceTypes)); |
| if (!autoDeviceTypes.empty()) |
| loopOp.setAuto_Attr(builder.getArrayAttr(autoDeviceTypes)); |
| |
| if (!collapseValues.empty()) |
| loopOp.setCollapseAttr(builder.getI64ArrayAttr(collapseValues)); |
| if (!collapseDeviceTypes.empty()) |
| loopOp.setCollapseDeviceTypeAttr(builder.getArrayAttr(collapseDeviceTypes)); |
| |
| if (combinedConstructs) |
| loopOp.setCombinedAttr(mlir::acc::CombinedConstructsTypeAttr::get( |
| builder.getContext(), *combinedConstructs)); |
| |
| // TODO: retrieve directives from NonLabelDoStmt pft::Evaluation, and add them |
| // as attribute to the acc.loop as an extra attribute. It is not quite clear |
| // how useful these $dir are in acc contexts, but they could still provide |
| // more information about the loop acc codegen. They can be obtained by |
| // looking for the first lexicalSuccessor of eval that is a NonLabelDoStmt, |
| // and using the related `dirs` member. |
| |
| return loopOp; |
| } |
| |
| static mlir::Value |
| genACC(Fortran::lower::AbstractConverter &converter, |
| Fortran::semantics::SemanticsContext &semanticsContext, |
| Fortran::lower::pft::Evaluation &eval, |
| const Fortran::parser::OpenACCLoopConstruct &loopConstruct) { |
| |
| const auto &beginLoopDirective = |
| std::get<Fortran::parser::AccBeginLoopDirective>(loopConstruct.t); |
| const auto &loopDirective = |
| std::get<Fortran::parser::AccLoopDirective>(beginLoopDirective.t); |
| |
| mlir::Location currentLocation = |
| converter.genLocation(beginLoopDirective.source); |
| Fortran::lower::StatementContext stmtCtx; |
| |
| assert(loopDirective.v == llvm::acc::ACCD_loop && |
| "Unsupported OpenACC loop construct"); |
| (void)loopDirective; |
| |
| const auto &accClauseList = |
| std::get<Fortran::parser::AccClauseList>(beginLoopDirective.t); |
| const auto &outerDoConstruct = |
| std::get<std::optional<Fortran::parser::DoConstruct>>(loopConstruct.t); |
| auto loopOp = createLoopOp(converter, currentLocation, semanticsContext, |
| stmtCtx, *outerDoConstruct, eval, accClauseList, |
| /*combinedConstructs=*/{}); |
| if (loopOp.getNumResults() == 1) |
| return loopOp.getResult(0); |
| |
| return mlir::Value{}; |
| } |
| |
| template <typename Op, typename Clause> |
| static void genDataOperandOperationsWithModifier( |
| const Clause *x, Fortran::lower::AbstractConverter &converter, |
| Fortran::semantics::SemanticsContext &semanticsContext, |
| Fortran::lower::StatementContext &stmtCtx, |
| Fortran::parser::AccDataModifier::Modifier mod, |
| llvm::SmallVectorImpl<mlir::Value> &dataClauseOperands, |
| const mlir::acc::DataClause clause, |
| const mlir::acc::DataClause clauseWithModifier, |
| llvm::ArrayRef<mlir::Value> async, |
| llvm::ArrayRef<mlir::Attribute> asyncDeviceTypes, |
| llvm::ArrayRef<mlir::Attribute> asyncOnlyDeviceTypes, |
| bool setDeclareAttr = false, AccDataMap *dataMap = nullptr) { |
| const Fortran::parser::AccObjectListWithModifier &listWithModifier = x->v; |
| const auto &accObjectList = |
| std::get<Fortran::parser::AccObjectList>(listWithModifier.t); |
| const auto &modifier = |
| std::get<std::optional<Fortran::parser::AccDataModifier>>( |
| listWithModifier.t); |
| mlir::acc::DataClause dataClause = |
| (modifier && (*modifier).v == mod) ? clauseWithModifier : clause; |
| genDataOperandOperations<Op>(accObjectList, converter, semanticsContext, |
| stmtCtx, dataClauseOperands, dataClause, |
| /*structured=*/true, /*implicit=*/false, async, |
| asyncDeviceTypes, asyncOnlyDeviceTypes, |
| setDeclareAttr, dataMap); |
| } |
| |
| template <typename Op> |
| static Op createComputeOp( |
| Fortran::lower::AbstractConverter &converter, |
| mlir::Location currentLocation, Fortran::lower::pft::Evaluation &eval, |
| Fortran::semantics::SemanticsContext &semanticsContext, |
| Fortran::lower::StatementContext &stmtCtx, |
| const Fortran::parser::AccClauseList &accClauseList, |
| std::optional<mlir::acc::CombinedConstructsType> combinedConstructs = |
| std::nullopt) { |
| |
| // Parallel operation operands |
| mlir::Value ifCond; |
| mlir::Value selfCond; |
| llvm::SmallVector<mlir::Value> waitOperands, attachEntryOperands, |
| copyEntryOperands, copyinEntryOperands, copyoutEntryOperands, |
| createEntryOperands, nocreateEntryOperands, presentEntryOperands, |
| dataClauseOperands, numGangs, numWorkers, vectorLength, async; |
| llvm::SmallVector<mlir::Attribute> numGangsDeviceTypes, numWorkersDeviceTypes, |
| vectorLengthDeviceTypes, asyncDeviceTypes, asyncOnlyDeviceTypes, |
| waitOperandsDeviceTypes, waitOnlyDeviceTypes; |
| llvm::SmallVector<int32_t> numGangsSegments, waitOperandsSegments; |
| llvm::SmallVector<bool> hasWaitDevnums; |
| |
| llvm::SmallVector<mlir::Value> reductionOperands, privateOperands, |
| firstprivateOperands; |
| |
| AccDataMap dataMap; |
| // Self clause has optional values but can be present with |
| // no value as well. When there is no value, the op has an attribute to |
| // represent the clause. |
| bool addSelfAttr = false; |
| |
| bool hasDefaultNone = false; |
| bool hasDefaultPresent = false; |
| |
| fir::FirOpBuilder &builder = converter.getFirOpBuilder(); |
| |
| // device_type attribute is set to `none` until a device_type clause is |
| // encountered. |
| llvm::SmallVector<mlir::Attribute> crtDeviceTypes; |
| auto crtDeviceTypeAttr = mlir::acc::DeviceTypeAttr::get( |
| builder.getContext(), mlir::acc::DeviceType::None); |
| crtDeviceTypes.push_back(crtDeviceTypeAttr); |
| |
| // Lower clauses values mapped to operands and array attributes. |
| // Keep track of each group of operands separately as clauses can appear |
| // more than once. |
| |
| // Process the clauses that may have a specified device_type first. |
| for (const Fortran::parser::AccClause &clause : accClauseList.v) { |
| if (const auto *asyncClause = |
| std::get_if<Fortran::parser::AccClause::Async>(&clause.u)) { |
| genAsyncClause(converter, asyncClause, async, asyncDeviceTypes, |
| asyncOnlyDeviceTypes, crtDeviceTypes, stmtCtx); |
| } else if (const auto *waitClause = |
| std::get_if<Fortran::parser::AccClause::Wait>(&clause.u)) { |
| genWaitClauseWithDeviceType(converter, waitClause, waitOperands, |
| waitOperandsDeviceTypes, waitOnlyDeviceTypes, |
| hasWaitDevnums, waitOperandsSegments, |
| crtDeviceTypes, stmtCtx); |
| } else if (const auto *numGangsClause = |
| std::get_if<Fortran::parser::AccClause::NumGangs>( |
| &clause.u)) { |
| llvm::SmallVector<mlir::Value> numGangValues; |
| for (const Fortran::parser::ScalarIntExpr &expr : numGangsClause->v) |
| numGangValues.push_back(fir::getBase(converter.genExprValue( |
| *Fortran::semantics::GetExpr(expr), stmtCtx))); |
| for (auto crtDeviceTypeAttr : crtDeviceTypes) { |
| for (auto value : numGangValues) |
| numGangs.push_back(value); |
| numGangsDeviceTypes.push_back(crtDeviceTypeAttr); |
| numGangsSegments.push_back(numGangValues.size()); |
| } |
| } else if (const auto *numWorkersClause = |
| std::get_if<Fortran::parser::AccClause::NumWorkers>( |
| &clause.u)) { |
| mlir::Value numWorkerValue = fir::getBase(converter.genExprValue( |
| *Fortran::semantics::GetExpr(numWorkersClause->v), stmtCtx)); |
| for (auto crtDeviceTypeAttr : crtDeviceTypes) { |
| numWorkers.push_back(numWorkerValue); |
| numWorkersDeviceTypes.push_back(crtDeviceTypeAttr); |
| } |
| } else if (const auto *vectorLengthClause = |
| std::get_if<Fortran::parser::AccClause::VectorLength>( |
| &clause.u)) { |
| mlir::Value vectorLengthValue = fir::getBase(converter.genExprValue( |
| *Fortran::semantics::GetExpr(vectorLengthClause->v), stmtCtx)); |
| for (auto crtDeviceTypeAttr : crtDeviceTypes) { |
| vectorLength.push_back(vectorLengthValue); |
| vectorLengthDeviceTypes.push_back(crtDeviceTypeAttr); |
| } |
| } else if (const auto *deviceTypeClause = |
| std::get_if<Fortran::parser::AccClause::DeviceType>( |
| &clause.u)) { |
| crtDeviceTypes.clear(); |
| gatherDeviceTypeAttrs(builder, deviceTypeClause, crtDeviceTypes); |
| } |
| } |
| |
| // Process the clauses independent of device_type. |
| for (const Fortran::parser::AccClause &clause : accClauseList.v) { |
| mlir::Location clauseLocation = converter.genLocation(clause.source); |
| if (const auto *ifClause = |
| std::get_if<Fortran::parser::AccClause::If>(&clause.u)) { |
| genIfClause(converter, clauseLocation, ifClause, ifCond, stmtCtx); |
| } else if (const auto *selfClause = |
| std::get_if<Fortran::parser::AccClause::Self>(&clause.u)) { |
| const std::optional<Fortran::parser::AccSelfClause> &accSelfClause = |
| selfClause->v; |
| if (accSelfClause) { |
| if (const auto *optCondition = |
| std::get_if<std::optional<Fortran::parser::ScalarLogicalExpr>>( |
| &(*accSelfClause).u)) { |
| if (*optCondition) { |
| mlir::Value cond = fir::getBase(converter.genExprValue( |
| *Fortran::semantics::GetExpr(*optCondition), stmtCtx)); |
| selfCond = builder.createConvert(clauseLocation, |
| builder.getI1Type(), cond); |
| } |
| } else if (const auto *accClauseList = |
| std::get_if<Fortran::parser::AccObjectList>( |
| &(*accSelfClause).u)) { |
| // TODO This would be nicer to be done in canonicalization step. |
| if (accClauseList->v.size() == 1) { |
| const auto &accObject = accClauseList->v.front(); |
| if (const auto *designator = |
| std::get_if<Fortran::parser::Designator>(&accObject.u)) { |
| if (const auto *name = |
| Fortran::parser::GetDesignatorNameIfDataRef( |
| *designator)) { |
| auto cond = converter.getSymbolAddress(*name->symbol); |
| selfCond = builder.createConvert(clauseLocation, |
| builder.getI1Type(), cond); |
| } |
| } |
| } |
| } |
| } else { |
| addSelfAttr = true; |
| } |
| } else if (const auto *copyClause = |
| std::get_if<Fortran::parser::AccClause::Copy>(&clause.u)) { |
| auto crtDataStart = dataClauseOperands.size(); |
| genDataOperandOperations<mlir::acc::CopyinOp>( |
| copyClause->v, converter, semanticsContext, stmtCtx, |
| dataClauseOperands, mlir::acc::DataClause::acc_copy, |
| /*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes, |
| asyncOnlyDeviceTypes, /*setDeclareAttr=*/false, &dataMap); |
| copyEntryOperands.append(dataClauseOperands.begin() + crtDataStart, |
| dataClauseOperands.end()); |
| } else if (const auto *copyinClause = |
| std::get_if<Fortran::parser::AccClause::Copyin>(&clause.u)) { |
| auto crtDataStart = dataClauseOperands.size(); |
| genDataOperandOperationsWithModifier<mlir::acc::CopyinOp, |
| Fortran::parser::AccClause::Copyin>( |
| copyinClause, converter, semanticsContext, stmtCtx, |
| Fortran::parser::AccDataModifier::Modifier::ReadOnly, |
| dataClauseOperands, mlir::acc::DataClause::acc_copyin, |
| mlir::acc::DataClause::acc_copyin_readonly, async, asyncDeviceTypes, |
| asyncOnlyDeviceTypes, /*setDeclareAttr=*/false, &dataMap); |
| copyinEntryOperands.append(dataClauseOperands.begin() + crtDataStart, |
| dataClauseOperands.end()); |
| } else if (const auto *copyoutClause = |
| std::get_if<Fortran::parser::AccClause::Copyout>( |
| &clause.u)) { |
| auto crtDataStart = dataClauseOperands.size(); |
| genDataOperandOperationsWithModifier<mlir::acc::CreateOp, |
| Fortran::parser::AccClause::Copyout>( |
| copyoutClause, converter, semanticsContext, stmtCtx, |
| Fortran::parser::AccDataModifier::Modifier::Zero, dataClauseOperands, |
| mlir::acc::DataClause::acc_copyout, |
| mlir::acc::DataClause::acc_copyout_zero, async, asyncDeviceTypes, |
| asyncOnlyDeviceTypes, /*setDeclareAttr=*/false, &dataMap); |
| copyoutEntryOperands.append(dataClauseOperands.begin() + crtDataStart, |
| dataClauseOperands.end()); |
| } else if (const auto *createClause = |
| std::get_if<Fortran::parser::AccClause::Create>(&clause.u)) { |
| auto crtDataStart = dataClauseOperands.size(); |
| genDataOperandOperationsWithModifier<mlir::acc::CreateOp, |
| Fortran::parser::AccClause::Create>( |
| createClause, converter, semanticsContext, stmtCtx, |
| Fortran::parser::AccDataModifier::Modifier::Zero, dataClauseOperands, |
| mlir::acc::DataClause::acc_create, |
| mlir::acc::DataClause::acc_create_zero, async, asyncDeviceTypes, |
| asyncOnlyDeviceTypes, /*setDeclareAttr=*/false, &dataMap); |
| createEntryOperands.append(dataClauseOperands.begin() + crtDataStart, |
| dataClauseOperands.end()); |
| } else if (const auto *noCreateClause = |
| std::get_if<Fortran::parser::AccClause::NoCreate>( |
| &clause.u)) { |
| auto crtDataStart = dataClauseOperands.size(); |
| genDataOperandOperations<mlir::acc::NoCreateOp>( |
| noCreateClause->v, converter, semanticsContext, stmtCtx, |
| dataClauseOperands, mlir::acc::DataClause::acc_no_create, |
| /*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes, |
| asyncOnlyDeviceTypes, /*setDeclareAttr=*/false, &dataMap); |
| nocreateEntryOperands.append(dataClauseOperands.begin() + crtDataStart, |
| dataClauseOperands.end()); |
| } else if (const auto *presentClause = |
| std::get_if<Fortran::parser::AccClause::Present>( |
| &clause.u)) { |
| auto crtDataStart = dataClauseOperands.size(); |
| genDataOperandOperations<mlir::acc::PresentOp>( |
| presentClause->v, converter, semanticsContext, stmtCtx, |
| dataClauseOperands, mlir::acc::DataClause::acc_present, |
| /*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes, |
| asyncOnlyDeviceTypes, /*setDeclareAttr=*/false, &dataMap); |
| presentEntryOperands.append(dataClauseOperands.begin() + crtDataStart, |
| dataClauseOperands.end()); |
| } else if (const auto *devicePtrClause = |
| std::get_if<Fortran::parser::AccClause::Deviceptr>( |
| &clause.u)) { |
| AccDataMap *symPairs = enableDevicePtrRemap ? &dataMap : nullptr; |
| genDataOperandOperations<mlir::acc::DevicePtrOp>( |
| devicePtrClause->v, converter, semanticsContext, stmtCtx, |
| dataClauseOperands, mlir::acc::DataClause::acc_deviceptr, |
| /*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes, |
| asyncOnlyDeviceTypes, /*setDeclareAttr=*/false, symPairs); |
| } else if (const auto *attachClause = |
| std::get_if<Fortran::parser::AccClause::Attach>(&clause.u)) { |
| auto crtDataStart = dataClauseOperands.size(); |
| genDataOperandOperations<mlir::acc::AttachOp>( |
| attachClause->v, converter, semanticsContext, stmtCtx, |
| dataClauseOperands, mlir::acc::DataClause::acc_attach, |
| /*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes, |
| asyncOnlyDeviceTypes, /*setDeclareAttr=*/false, &dataMap); |
| attachEntryOperands.append(dataClauseOperands.begin() + crtDataStart, |
| dataClauseOperands.end()); |
| } else if (const auto *privateClause = |
| std::get_if<Fortran::parser::AccClause::Private>( |
| &clause.u)) { |
| if (!combinedConstructs) |
| genDataOperandOperations<mlir::acc::PrivateOp>( |
| privateClause->v, converter, semanticsContext, stmtCtx, |
| privateOperands, mlir::acc::DataClause::acc_private, |
| /*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes, |
| asyncOnlyDeviceTypes, |
| /*setDeclareAttr=*/false, &dataMap); |
| } else if (const auto *firstprivateClause = |
| std::get_if<Fortran::parser::AccClause::Firstprivate>( |
| &clause.u)) { |
| genDataOperandOperations<mlir::acc::FirstprivateOp>( |
| firstprivateClause->v, converter, semanticsContext, stmtCtx, |
| firstprivateOperands, mlir::acc::DataClause::acc_firstprivate, |
| /*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes, |
| asyncOnlyDeviceTypes, |
| /*setDeclareAttr=*/false, &dataMap); |
| } else if (const auto *reductionClause = |
| std::get_if<Fortran::parser::AccClause::Reduction>( |
| &clause.u)) { |
| // A reduction clause on a combined construct is treated as if it appeared |
| // on the loop construct. So don't generate a reduction clause when it is |
| // combined - delay it to the loop. However, a reduction clause on a |
| // combined construct implies a copy clause so issue an implicit copy |
| // instead. |
| if (!combinedConstructs) { |
| genReductions(reductionClause->v, converter, semanticsContext, stmtCtx, |
| reductionOperands, async, asyncDeviceTypes, |
| asyncOnlyDeviceTypes, &dataMap); |
| } else { |
| auto crtDataStart = dataClauseOperands.size(); |
| genDataOperandOperations<mlir::acc::CopyinOp>( |
| std::get<Fortran::parser::AccObjectList>(reductionClause->v.t), |
| converter, semanticsContext, stmtCtx, dataClauseOperands, |
| mlir::acc::DataClause::acc_reduction, |
| /*structured=*/true, /*implicit=*/true, async, asyncDeviceTypes, |
| asyncOnlyDeviceTypes, /*setDeclareAttr=*/false, &dataMap); |
| copyEntryOperands.append(dataClauseOperands.begin() + crtDataStart, |
| dataClauseOperands.end()); |
| } |
| } else if (const auto *defaultClause = |
| std::get_if<Fortran::parser::AccClause::Default>( |
| &clause.u)) { |
| if ((defaultClause->v).v == llvm::acc::DefaultValue::ACC_Default_none) |
| hasDefaultNone = true; |
| else if ((defaultClause->v).v == |
| llvm::acc::DefaultValue::ACC_Default_present) |
| hasDefaultPresent = true; |
| } |
| } |
| |
| // Prepare the operand segment size attribute and the operands value range. |
| llvm::SmallVector<mlir::Value, 8> operands; |
| llvm::SmallVector<int32_t, 8> operandSegments; |
| addOperands(operands, operandSegments, async); |
| addOperands(operands, operandSegments, waitOperands); |
| if constexpr (!std::is_same_v<Op, mlir::acc::SerialOp>) { |
| addOperands(operands, operandSegments, numGangs); |
| addOperands(operands, operandSegments, numWorkers); |
| addOperands(operands, operandSegments, vectorLength); |
| } |
| addOperand(operands, operandSegments, ifCond); |
| addOperand(operands, operandSegments, selfCond); |
| addOperands(operands, operandSegments, reductionOperands); |
| addOperands(operands, operandSegments, privateOperands); |
| addOperands(operands, operandSegments, firstprivateOperands); |
| addOperands(operands, operandSegments, dataClauseOperands); |
| |
| Op computeOp; |
| if constexpr (std::is_same_v<Op, mlir::acc::KernelsOp>) |
| computeOp = createRegionOp<Op, mlir::acc::TerminatorOp>( |
| builder, currentLocation, currentLocation, eval, operands, |
| operandSegments, /*outerCombined=*/combinedConstructs.has_value()); |
| else |
| computeOp = createRegionOp<Op, mlir::acc::YieldOp>( |
| builder, currentLocation, currentLocation, eval, operands, |
| operandSegments, /*outerCombined=*/combinedConstructs.has_value()); |
| |
| if (addSelfAttr) |
| computeOp.setSelfAttrAttr(builder.getUnitAttr()); |
| |
| if (hasDefaultNone) |
| computeOp.setDefaultAttr(mlir::acc::ClauseDefaultValue::None); |
| if (hasDefaultPresent) |
| computeOp.setDefaultAttr(mlir::acc::ClauseDefaultValue::Present); |
| |
| if constexpr (!std::is_same_v<Op, mlir::acc::SerialOp>) { |
| if (!numWorkersDeviceTypes.empty()) |
| computeOp.setNumWorkersDeviceTypeAttr( |
| mlir::ArrayAttr::get(builder.getContext(), numWorkersDeviceTypes)); |
| if (!vectorLengthDeviceTypes.empty()) |
| computeOp.setVectorLengthDeviceTypeAttr( |
| mlir::ArrayAttr::get(builder.getContext(), vectorLengthDeviceTypes)); |
| if (!numGangsDeviceTypes.empty()) |
| computeOp.setNumGangsDeviceTypeAttr( |
| mlir::ArrayAttr::get(builder.getContext(), numGangsDeviceTypes)); |
| if (!numGangsSegments.empty()) |
| computeOp.setNumGangsSegmentsAttr( |
| builder.getDenseI32ArrayAttr(numGangsSegments)); |
| } |
| if (!asyncDeviceTypes.empty()) |
| computeOp.setAsyncOperandsDeviceTypeAttr( |
| builder.getArrayAttr(asyncDeviceTypes)); |
| if (!asyncOnlyDeviceTypes.empty()) |
| computeOp.setAsyncOnlyAttr(builder.getArrayAttr(asyncOnlyDeviceTypes)); |
| |
| if (!waitOperandsDeviceTypes.empty()) |
| computeOp.setWaitOperandsDeviceTypeAttr( |
| builder.getArrayAttr(waitOperandsDeviceTypes)); |
| if (!waitOperandsSegments.empty()) |
| computeOp.setWaitOperandsSegmentsAttr( |
| builder.getDenseI32ArrayAttr(waitOperandsSegments)); |
| if (!hasWaitDevnums.empty()) |
| computeOp.setHasWaitDevnumAttr(builder.getBoolArrayAttr(hasWaitDevnums)); |
| if (!waitOnlyDeviceTypes.empty()) |
| computeOp.setWaitOnlyAttr(builder.getArrayAttr(waitOnlyDeviceTypes)); |
| |
| if (combinedConstructs) |
| computeOp.setCombinedAttr(builder.getUnitAttr()); |
| |
| auto insPt = builder.saveInsertionPoint(); |
| |
| // Remap symbols from data clauses to use data operation results |
| dataMap.remapDataOperandSymbols(converter, builder, computeOp.getRegion()); |
| |
| builder.setInsertionPointAfter(computeOp); |
| |
| // Create the exit operations after the region. |
| genDataExitOperations<mlir::acc::CopyinOp, mlir::acc::CopyoutOp>( |
| builder, copyEntryOperands, /*structured=*/true); |
| genDataExitOperations<mlir::acc::CopyinOp, mlir::acc::DeleteOp>( |
| builder, copyinEntryOperands, /*structured=*/true); |
| genDataExitOperations<mlir::acc::CreateOp, mlir::acc::CopyoutOp>( |
| builder, copyoutEntryOperands, /*structured=*/true); |
| genDataExitOperations<mlir::acc::AttachOp, mlir::acc::DetachOp>( |
| builder, attachEntryOperands, /*structured=*/true); |
| genDataExitOperations<mlir::acc::CreateOp, mlir::acc::DeleteOp>( |
| builder, createEntryOperands, /*structured=*/true); |
| genDataExitOperations<mlir::acc::NoCreateOp, mlir::acc::DeleteOp>( |
| builder, nocreateEntryOperands, /*structured=*/true); |
| genDataExitOperations<mlir::acc::PresentOp, mlir::acc::DeleteOp>( |
| builder, presentEntryOperands, /*structured=*/true); |
| |
| builder.restoreInsertionPoint(insPt); |
| return computeOp; |
| } |
| |
| static void genACCDataOp(Fortran::lower::AbstractConverter &converter, |
| mlir::Location currentLocation, |
| mlir::Location endLocation, |
| Fortran::lower::pft::Evaluation &eval, |
| Fortran::semantics::SemanticsContext &semanticsContext, |
| Fortran::lower::StatementContext &stmtCtx, |
| const Fortran::parser::AccClauseList &accClauseList) { |
| mlir::Value ifCond; |
| llvm::SmallVector<mlir::Value> attachEntryOperands, createEntryOperands, |
| copyEntryOperands, copyinEntryOperands, copyoutEntryOperands, |
| nocreateEntryOperands, presentEntryOperands, dataClauseOperands, |
| waitOperands, async; |
| llvm::SmallVector<mlir::Attribute> asyncDeviceTypes, asyncOnlyDeviceTypes, |
| waitOperandsDeviceTypes, waitOnlyDeviceTypes; |
| llvm::SmallVector<int32_t> waitOperandsSegments; |
| llvm::SmallVector<bool> hasWaitDevnums; |
| |
| bool hasDefaultNone = false; |
| bool hasDefaultPresent = false; |
| |
| fir::FirOpBuilder &builder = converter.getFirOpBuilder(); |
| |
| // device_type attribute is set to `none` until a device_type clause is |
| // encountered. |
| llvm::SmallVector<mlir::Attribute> crtDeviceTypes; |
| crtDeviceTypes.push_back(mlir::acc::DeviceTypeAttr::get( |
| builder.getContext(), mlir::acc::DeviceType::None)); |
| |
| // Lower clauses values mapped to operands and array attributes. |
| // Keep track of each group of operands separately as clauses can appear |
| // more than once. |
| |
| // Process the clauses that may have a specified device_type first. |
| for (const Fortran::parser::AccClause &clause : accClauseList.v) { |
| if (const auto *asyncClause = |
| std::get_if<Fortran::parser::AccClause::Async>(&clause.u)) { |
| genAsyncClause(converter, asyncClause, async, asyncDeviceTypes, |
| asyncOnlyDeviceTypes, crtDeviceTypes, stmtCtx); |
| } else if (const auto *waitClause = |
| std::get_if<Fortran::parser::AccClause::Wait>(&clause.u)) { |
| genWaitClauseWithDeviceType(converter, waitClause, waitOperands, |
| waitOperandsDeviceTypes, waitOnlyDeviceTypes, |
| hasWaitDevnums, waitOperandsSegments, |
| crtDeviceTypes, stmtCtx); |
| } else if (const auto *deviceTypeClause = |
| std::get_if<Fortran::parser::AccClause::DeviceType>( |
| &clause.u)) { |
| crtDeviceTypes.clear(); |
| gatherDeviceTypeAttrs(builder, deviceTypeClause, crtDeviceTypes); |
| } |
| } |
| |
| // Process the clauses independent of device_type. |
| for (const Fortran::parser::AccClause &clause : accClauseList.v) { |
| mlir::Location clauseLocation = converter.genLocation(clause.source); |
| if (const auto *ifClause = |
| std::get_if<Fortran::parser::AccClause::If>(&clause.u)) { |
| genIfClause(converter, clauseLocation, ifClause, ifCond, stmtCtx); |
| } else if (const auto *copyClause = |
| std::get_if<Fortran::parser::AccClause::Copy>(&clause.u)) { |
| auto crtDataStart = dataClauseOperands.size(); |
| genDataOperandOperations<mlir::acc::CopyinOp>( |
| copyClause->v, converter, semanticsContext, stmtCtx, |
| dataClauseOperands, mlir::acc::DataClause::acc_copy, |
| /*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes, |
| asyncOnlyDeviceTypes); |
| copyEntryOperands.append(dataClauseOperands.begin() + crtDataStart, |
| dataClauseOperands.end()); |
| } else if (const auto *copyinClause = |
| std::get_if<Fortran::parser::AccClause::Copyin>(&clause.u)) { |
| auto crtDataStart = dataClauseOperands.size(); |
| genDataOperandOperationsWithModifier<mlir::acc::CopyinOp, |
| Fortran::parser::AccClause::Copyin>( |
| copyinClause, converter, semanticsContext, stmtCtx, |
| Fortran::parser::AccDataModifier::Modifier::ReadOnly, |
| dataClauseOperands, mlir::acc::DataClause::acc_copyin, |
| mlir::acc::DataClause::acc_copyin_readonly, async, asyncDeviceTypes, |
| asyncOnlyDeviceTypes); |
| copyinEntryOperands.append(dataClauseOperands.begin() + crtDataStart, |
| dataClauseOperands.end()); |
| } else if (const auto *copyoutClause = |
| std::get_if<Fortran::parser::AccClause::Copyout>( |
| &clause.u)) { |
| auto crtDataStart = dataClauseOperands.size(); |
| genDataOperandOperationsWithModifier<mlir::acc::CreateOp, |
| Fortran::parser::AccClause::Copyout>( |
| copyoutClause, converter, semanticsContext, stmtCtx, |
| Fortran::parser::AccDataModifier::Modifier::Zero, dataClauseOperands, |
| mlir::acc::DataClause::acc_copyout, |
| mlir::acc::DataClause::acc_copyout_zero, async, asyncDeviceTypes, |
| asyncOnlyDeviceTypes); |
| copyoutEntryOperands.append(dataClauseOperands.begin() + crtDataStart, |
| dataClauseOperands.end()); |
| } else if (const auto *createClause = |
| std::get_if<Fortran::parser::AccClause::Create>(&clause.u)) { |
| auto crtDataStart = dataClauseOperands.size(); |
| genDataOperandOperationsWithModifier<mlir::acc::CreateOp, |
| Fortran::parser::AccClause::Create>( |
| createClause, converter, semanticsContext, stmtCtx, |
| Fortran::parser::AccDataModifier::Modifier::Zero, dataClauseOperands, |
| mlir::acc::DataClause::acc_create, |
| mlir::acc::DataClause::acc_create_zero, async, asyncDeviceTypes, |
| asyncOnlyDeviceTypes); |
| createEntryOperands.append(dataClauseOperands.begin() + crtDataStart, |
| dataClauseOperands.end()); |
| } else if (const auto *noCreateClause = |
| std::get_if<Fortran::parser::AccClause::NoCreate>( |
| &clause.u)) { |
| auto crtDataStart = dataClauseOperands.size(); |
| genDataOperandOperations<mlir::acc::NoCreateOp>( |
| noCreateClause->v, converter, semanticsContext, stmtCtx, |
| dataClauseOperands, mlir::acc::DataClause::acc_no_create, |
| /*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes, |
| asyncOnlyDeviceTypes); |
| nocreateEntryOperands.append(dataClauseOperands.begin() + crtDataStart, |
| dataClauseOperands.end()); |
| } else if (const auto *presentClause = |
| std::get_if<Fortran::parser::AccClause::Present>( |
| &clause.u)) { |
| auto crtDataStart = dataClauseOperands.size(); |
| genDataOperandOperations<mlir::acc::PresentOp>( |
| presentClause->v, converter, semanticsContext, stmtCtx, |
| dataClauseOperands, mlir::acc::DataClause::acc_present, |
| /*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes, |
| asyncOnlyDeviceTypes); |
| presentEntryOperands.append(dataClauseOperands.begin() + crtDataStart, |
| dataClauseOperands.end()); |
| } else if (const auto *deviceptrClause = |
| std::get_if<Fortran::parser::AccClause::Deviceptr>( |
| &clause.u)) { |
| genDataOperandOperations<mlir::acc::DevicePtrOp>( |
| deviceptrClause->v, converter, semanticsContext, stmtCtx, |
| dataClauseOperands, mlir::acc::DataClause::acc_deviceptr, |
| /*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes, |
| asyncOnlyDeviceTypes); |
| } else if (const auto *attachClause = |
| std::get_if<Fortran::parser::AccClause::Attach>(&clause.u)) { |
| auto crtDataStart = dataClauseOperands.size(); |
| genDataOperandOperations<mlir::acc::AttachOp>( |
| attachClause->v, converter, semanticsContext, stmtCtx, |
| dataClauseOperands, mlir::acc::DataClause::acc_attach, |
| /*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes, |
| asyncOnlyDeviceTypes); |
| attachEntryOperands.append(dataClauseOperands.begin() + crtDataStart, |
| dataClauseOperands.end()); |
| } else if (const auto *defaultClause = |
| std::get_if<Fortran::parser::AccClause::Default>( |
| &clause.u)) { |
| if ((defaultClause->v).v == llvm::acc::DefaultValue::ACC_Default_none) |
| hasDefaultNone = true; |
| else if ((defaultClause->v).v == |
| llvm::acc::DefaultValue::ACC_Default_present) |
| hasDefaultPresent = true; |
| } |
| } |
| |
| // Prepare the operand segment size attribute and the operands value range. |
| llvm::SmallVector<mlir::Value> operands; |
| llvm::SmallVector<int32_t> operandSegments; |
| addOperand(operands, operandSegments, ifCond); |
| addOperands(operands, operandSegments, async); |
| addOperands(operands, operandSegments, waitOperands); |
| addOperands(operands, operandSegments, dataClauseOperands); |
| |
| if (dataClauseOperands.empty() && !hasDefaultNone && !hasDefaultPresent) |
| return; |
| |
| auto dataOp = createRegionOp<mlir::acc::DataOp, mlir::acc::TerminatorOp>( |
| builder, currentLocation, currentLocation, eval, operands, |
| operandSegments); |
| |
| if (!asyncDeviceTypes.empty()) |
| dataOp.setAsyncOperandsDeviceTypeAttr( |
| builder.getArrayAttr(asyncDeviceTypes)); |
| if (!asyncOnlyDeviceTypes.empty()) |
| dataOp.setAsyncOnlyAttr(builder.getArrayAttr(asyncOnlyDeviceTypes)); |
| if (!waitOperandsDeviceTypes.empty()) |
| dataOp.setWaitOperandsDeviceTypeAttr( |
| builder.getArrayAttr(waitOperandsDeviceTypes)); |
| if (!waitOperandsSegments.empty()) |
| dataOp.setWaitOperandsSegmentsAttr( |
| builder.getDenseI32ArrayAttr(waitOperandsSegments)); |
| if (!hasWaitDevnums.empty()) |
| dataOp.setHasWaitDevnumAttr(builder.getBoolArrayAttr(hasWaitDevnums)); |
| if (!waitOnlyDeviceTypes.empty()) |
| dataOp.setWaitOnlyAttr(builder.getArrayAttr(waitOnlyDeviceTypes)); |
| |
| if (hasDefaultNone) |
| dataOp.setDefaultAttr(mlir::acc::ClauseDefaultValue::None); |
| if (hasDefaultPresent) |
| dataOp.setDefaultAttr(mlir::acc::ClauseDefaultValue::Present); |
| |
| auto insPt = builder.saveInsertionPoint(); |
| builder.setInsertionPointAfter(dataOp); |
| |
| // Create the exit operations after the region. |
| genDataExitOperations<mlir::acc::CopyinOp, mlir::acc::CopyoutOp>( |
| builder, copyEntryOperands, /*structured=*/true, endLocation); |
| genDataExitOperations<mlir::acc::CopyinOp, mlir::acc::DeleteOp>( |
| builder, copyinEntryOperands, /*structured=*/true, endLocation); |
| genDataExitOperations<mlir::acc::CreateOp, mlir::acc::CopyoutOp>( |
| builder, copyoutEntryOperands, /*structured=*/true, endLocation); |
| genDataExitOperations<mlir::acc::AttachOp, mlir::acc::DetachOp>( |
| builder, attachEntryOperands, /*structured=*/true, endLocation); |
| genDataExitOperations<mlir::acc::CreateOp, mlir::acc::DeleteOp>( |
| builder, createEntryOperands, /*structured=*/true, endLocation); |
| genDataExitOperations<mlir::acc::NoCreateOp, mlir::acc::DeleteOp>( |
| builder, nocreateEntryOperands, /*structured=*/true, endLocation); |
| genDataExitOperations<mlir::acc::PresentOp, mlir::acc::DeleteOp>( |
| builder, presentEntryOperands, /*structured=*/true, endLocation); |
| |
| builder.restoreInsertionPoint(insPt); |
| } |
| |
| static void |
| genACCHostDataOp(Fortran::lower::AbstractConverter &converter, |
| mlir::Location currentLocation, |
| Fortran::lower::pft::Evaluation &eval, |
| Fortran::semantics::SemanticsContext &semanticsContext, |
| Fortran::lower::StatementContext &stmtCtx, |
| const Fortran::parser::AccClauseList &accClauseList, |
| Fortran::lower::SymMap &localSymbols) { |
| mlir::Value ifCond; |
| llvm::SmallVector<mlir::Value> dataOperands; |
| bool addIfPresentAttr = false; |
| |
| fir::FirOpBuilder &builder = converter.getFirOpBuilder(); |
| |
| AccDataMap dataMap; |
| for (const Fortran::parser::AccClause &clause : accClauseList.v) { |
| mlir::Location clauseLocation = converter.genLocation(clause.source); |
| if (const auto *ifClause = |
| std::get_if<Fortran::parser::AccClause::If>(&clause.u)) { |
| genIfClause(converter, clauseLocation, ifClause, ifCond, stmtCtx); |
| } else if (const auto *useDevice = |
| std::get_if<Fortran::parser::AccClause::UseDevice>( |
| &clause.u)) { |
| // When CUDA Fotran is enabled, extra symbols are used in the host_data |
| // region. Look for them and bind their values with the symbols in the |
| // outer scope. |
| if (semanticsContext.IsEnabled(Fortran::common::LanguageFeature::CUDA)) { |
| const Fortran::parser::AccObjectList &objectList{useDevice->v}; |
| for (const auto &accObject : objectList.v) { |
| Fortran::semantics::Symbol &symbol = |
| getSymbolFromAccObject(accObject); |
| const Fortran::semantics::Symbol *baseSym = |
| localSymbols.lookupSymbolByName(symbol.name().ToString()); |
| localSymbols.copySymbolBinding(*baseSym, symbol); |
| } |
| } |
| genDataOperandOperations<mlir::acc::UseDeviceOp>( |
| useDevice->v, converter, semanticsContext, stmtCtx, dataOperands, |
| mlir::acc::DataClause::acc_use_device, |
| /*structured=*/true, /*implicit=*/false, /*async=*/{}, |
| /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{}, |
| /*setDeclareAttr=*/false, &dataMap); |
| } else if (std::get_if<Fortran::parser::AccClause::IfPresent>(&clause.u)) { |
| addIfPresentAttr = true; |
| } |
| } |
| |
| if (ifCond) { |
| if (auto cst = |
| mlir::dyn_cast<mlir::arith::ConstantOp>(ifCond.getDefiningOp())) |
| if (auto boolAttr = mlir::dyn_cast<mlir::BoolAttr>(cst.getValue())) { |
| if (boolAttr.getValue()) { |
| // get rid of the if condition if it is always true. |
| ifCond = mlir::Value(); |
| } else { |
| // Do not generate the acc.host_data op if the if condition is always |
| // false. |
| return; |
| } |
| } |
| } |
| |
| // Prepare the operand segment size attribute and the operands value range. |
| llvm::SmallVector<mlir::Value> operands; |
| llvm::SmallVector<int32_t> operandSegments; |
| addOperand(operands, operandSegments, ifCond); |
| addOperands(operands, operandSegments, dataOperands); |
| |
| auto hostDataOp = |
| createRegionOp<mlir::acc::HostDataOp, mlir::acc::TerminatorOp>( |
| builder, currentLocation, currentLocation, eval, operands, |
| operandSegments); |
| |
| if (addIfPresentAttr) |
| hostDataOp.setIfPresentAttr(builder.getUnitAttr()); |
| |
| // Remap symbols from use_device clauses to use the data operation results. |
| dataMap.remapDataOperandSymbols(converter, builder, hostDataOp.getRegion()); |
| } |
| |
| static void genACC(Fortran::lower::AbstractConverter &converter, |
| Fortran::semantics::SemanticsContext &semanticsContext, |
| Fortran::lower::pft::Evaluation &eval, |
| const Fortran::parser::OpenACCBlockConstruct &blockConstruct, |
| Fortran::lower::SymMap &localSymbols) { |
| const auto &beginBlockDirective = |
| std::get<Fortran::parser::AccBeginBlockDirective>(blockConstruct.t); |
| const auto &blockDirective = |
| std::get<Fortran::parser::AccBlockDirective>(beginBlockDirective.t); |
| const auto &accClauseList = |
| std::get<Fortran::parser::AccClauseList>(beginBlockDirective.t); |
| const auto &endBlockDirective = |
| std::get<Fortran::parser::AccEndBlockDirective>(blockConstruct.t); |
| mlir::Location endLocation = converter.genLocation(endBlockDirective.source); |
| mlir::Location currentLocation = converter.genLocation(blockDirective.source); |
| Fortran::lower::StatementContext stmtCtx; |
| |
| if (blockDirective.v == llvm::acc::ACCD_parallel) { |
| createComputeOp<mlir::acc::ParallelOp>(converter, currentLocation, eval, |
| semanticsContext, stmtCtx, |
| accClauseList); |
| } else if (blockDirective.v == llvm::acc::ACCD_data) { |
| genACCDataOp(converter, currentLocation, endLocation, eval, |
| semanticsContext, stmtCtx, accClauseList); |
| } else if (blockDirective.v == llvm::acc::ACCD_serial) { |
| createComputeOp<mlir::acc::SerialOp>(converter, currentLocation, eval, |
| semanticsContext, stmtCtx, |
| accClauseList); |
| } else if (blockDirective.v == llvm::acc::ACCD_kernels) { |
| createComputeOp<mlir::acc::KernelsOp>(converter, currentLocation, eval, |
| semanticsContext, stmtCtx, |
| accClauseList); |
| } else if (blockDirective.v == llvm::acc::ACCD_host_data) { |
| genACCHostDataOp(converter, currentLocation, eval, semanticsContext, |
| stmtCtx, accClauseList, localSymbols); |
| } |
| } |
| |
| static void |
| genACC(Fortran::lower::AbstractConverter &converter, |
| Fortran::semantics::SemanticsContext &semanticsContext, |
| Fortran::lower::pft::Evaluation &eval, |
| const Fortran::parser::OpenACCCombinedConstruct &combinedConstruct) { |
| const auto &beginCombinedDirective = |
| std::get<Fortran::parser::AccBeginCombinedDirective>(combinedConstruct.t); |
| const auto &combinedDirective = |
| std::get<Fortran::parser::AccCombinedDirective>(beginCombinedDirective.t); |
| const auto &accClauseList = |
| std::get<Fortran::parser::AccClauseList>(beginCombinedDirective.t); |
| const auto &outerDoConstruct = |
| std::get<std::optional<Fortran::parser::DoConstruct>>( |
| combinedConstruct.t); |
| |
| mlir::Location currentLocation = |
| converter.genLocation(beginCombinedDirective.source); |
| Fortran::lower::StatementContext stmtCtx; |
| |
| if (combinedDirective.v == llvm::acc::ACCD_kernels_loop) { |
| createComputeOp<mlir::acc::KernelsOp>( |
| converter, currentLocation, eval, semanticsContext, stmtCtx, |
| accClauseList, mlir::acc::CombinedConstructsType::KernelsLoop); |
| createLoopOp(converter, currentLocation, semanticsContext, stmtCtx, |
| *outerDoConstruct, eval, accClauseList, |
| mlir::acc::CombinedConstructsType::KernelsLoop); |
| } else if (combinedDirective.v == llvm::acc::ACCD_parallel_loop) { |
| createComputeOp<mlir::acc::ParallelOp>( |
| converter, currentLocation, eval, semanticsContext, stmtCtx, |
| accClauseList, mlir::acc::CombinedConstructsType::ParallelLoop); |
| createLoopOp(converter, currentLocation, semanticsContext, stmtCtx, |
| *outerDoConstruct, eval, accClauseList, |
| mlir::acc::CombinedConstructsType::ParallelLoop); |
| } else if (combinedDirective.v == llvm::acc::ACCD_serial_loop) { |
| createComputeOp<mlir::acc::SerialOp>( |
| converter, currentLocation, eval, semanticsContext, stmtCtx, |
| accClauseList, mlir::acc::CombinedConstructsType::SerialLoop); |
| createLoopOp(converter, currentLocation, semanticsContext, stmtCtx, |
| *outerDoConstruct, eval, accClauseList, |
| mlir::acc::CombinedConstructsType::SerialLoop); |
| } else { |
| llvm::report_fatal_error("Unknown combined construct encountered"); |
| } |
| } |
| |
| static void |
| genACCEnterDataOp(Fortran::lower::AbstractConverter &converter, |
| mlir::Location currentLocation, |
| Fortran::semantics::SemanticsContext &semanticsContext, |
| Fortran::lower::StatementContext &stmtCtx, |
| const Fortran::parser::AccClauseList &accClauseList) { |
| mlir::Value ifCond, async, waitDevnum; |
| llvm::SmallVector<mlir::Value> waitOperands, dataClauseOperands; |
| |
| // Async, wait and self clause have optional values but can be present with |
| // no value as well. When there is no value, the op has an attribute to |
| // represent the clause. |
| bool addAsyncAttr = false; |
| bool addWaitAttr = false; |
| |
| fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); |
| |
| // Lower clauses values mapped to operands. |
| // Keep track of each group of operands separately as clauses can appear |
| // more than once. |
| |
| // Process the async clause first. |
| for (const Fortran::parser::AccClause &clause : accClauseList.v) { |
| if (const auto *asyncClause = |
| std::get_if<Fortran::parser::AccClause::Async>(&clause.u)) { |
| genAsyncClause(converter, asyncClause, async, addAsyncAttr, stmtCtx); |
| } |
| } |
| |
| // The async clause of 'enter data' applies to all device types, |
| // so propagate the async clause to copyin/create/attach ops |
| // as if it is an async clause without preceding device_type clause. |
| llvm::SmallVector<mlir::Attribute> asyncDeviceTypes, asyncOnlyDeviceTypes; |
| llvm::SmallVector<mlir::Value> asyncValues; |
| auto noneDeviceTypeAttr = mlir::acc::DeviceTypeAttr::get( |
| firOpBuilder.getContext(), mlir::acc::DeviceType::None); |
| if (addAsyncAttr) { |
| asyncOnlyDeviceTypes.push_back(noneDeviceTypeAttr); |
| } else if (async) { |
| asyncValues.push_back(async); |
| asyncDeviceTypes.push_back(noneDeviceTypeAttr); |
| } |
| |
| for (const Fortran::parser::AccClause &clause : accClauseList.v) { |
| mlir::Location clauseLocation = converter.genLocation(clause.source); |
| if (const auto *ifClause = |
| std::get_if<Fortran::parser::AccClause::If>(&clause.u)) { |
| genIfClause(converter, clauseLocation, ifClause, ifCond, stmtCtx); |
| } else if (const auto *waitClause = |
| std::get_if<Fortran::parser::AccClause::Wait>(&clause.u)) { |
| genWaitClause(converter, waitClause, waitOperands, waitDevnum, |
| addWaitAttr, stmtCtx); |
| } else if (const auto *copyinClause = |
| std::get_if<Fortran::parser::AccClause::Copyin>(&clause.u)) { |
| const Fortran::parser::AccObjectListWithModifier &listWithModifier = |
| copyinClause->v; |
| const auto &accObjectList = |
| std::get<Fortran::parser::AccObjectList>(listWithModifier.t); |
| genDataOperandOperations<mlir::acc::CopyinOp>( |
| accObjectList, converter, semanticsContext, stmtCtx, |
| dataClauseOperands, mlir::acc::DataClause::acc_copyin, false, |
| /*implicit=*/false, asyncValues, asyncDeviceTypes, |
| asyncOnlyDeviceTypes); |
| } else if (const auto *createClause = |
| std::get_if<Fortran::parser::AccClause::Create>(&clause.u)) { |
| const Fortran::parser::AccObjectListWithModifier &listWithModifier = |
| createClause->v; |
| const auto &accObjectList = |
| std::get<Fortran::parser::AccObjectList>(listWithModifier.t); |
| const auto &modifier = |
| std::get<std::optional<Fortran::parser::AccDataModifier>>( |
| listWithModifier.t); |
| mlir::acc::DataClause clause = mlir::acc::DataClause::acc_create; |
| if (modifier && |
| (*modifier).v == Fortran::parser::AccDataModifier::Modifier::Zero) |
| clause = mlir::acc::DataClause::acc_create_zero; |
| genDataOperandOperations<mlir::acc::CreateOp>( |
| accObjectList, converter, semanticsContext, stmtCtx, |
| dataClauseOperands, clause, false, /*implicit=*/false, asyncValues, |
| asyncDeviceTypes, asyncOnlyDeviceTypes); |
| } else if (const auto *attachClause = |
| std::get_if<Fortran::parser::AccClause::Attach>(&clause.u)) { |
| genDataOperandOperations<mlir::acc::AttachOp>( |
| attachClause->v, converter, semanticsContext, stmtCtx, |
| dataClauseOperands, mlir::acc::DataClause::acc_attach, false, |
| /*implicit=*/false, asyncValues, asyncDeviceTypes, |
| asyncOnlyDeviceTypes); |
| } else if (!std::get_if<Fortran::parser::AccClause::Async>(&clause.u)) { |
| llvm::report_fatal_error( |
| "Unknown clause in ENTER DATA directive lowering"); |
| } |
| } |
| |
| // Prepare the operand segment size attribute and the operands value range. |
| llvm::SmallVector<mlir::Value, 16> operands; |
| llvm::SmallVector<int32_t, 8> operandSegments; |
| addOperand(operands, operandSegments, ifCond); |
| addOperand(operands, operandSegments, async); |
| addOperand(operands, operandSegments, waitDevnum); |
| addOperands(operands, operandSegments, waitOperands); |
| addOperands(operands, operandSegments, dataClauseOperands); |
| |
| mlir::acc::EnterDataOp enterDataOp = createSimpleOp<mlir::acc::EnterDataOp>( |
| firOpBuilder, currentLocation, operands, operandSegments); |
| |
| if (addAsyncAttr) |
| enterDataOp.setAsyncAttr(firOpBuilder.getUnitAttr()); |
| if (addWaitAttr) |
| enterDataOp.setWaitAttr(firOpBuilder.getUnitAttr()); |
| } |
| |
| static void |
| genACCExitDataOp(Fortran::lower::AbstractConverter &converter, |
| mlir::Location currentLocation, |
| Fortran::semantics::SemanticsContext &semanticsContext, |
| Fortran::lower::StatementContext &stmtCtx, |
| const Fortran::parser::AccClauseList &accClauseList) { |
| mlir::Value ifCond, async, waitDevnum; |
| llvm::SmallVector<mlir::Value> waitOperands, dataClauseOperands, |
| copyoutOperands, deleteOperands, detachOperands; |
| |
| // Async and wait clause have optional values but can be present with |
| // no value as well. When there is no value, the op has an attribute to |
| // represent the clause. |
| bool addAsyncAttr = false; |
| bool addWaitAttr = false; |
| bool addFinalizeAttr = false; |
| |
| fir::FirOpBuilder &builder = converter.getFirOpBuilder(); |
| |
| // Lower clauses values mapped to operands. |
| // Keep track of each group of operands separately as clauses can appear |
| // more than once. |
| |
| // Process the async clause first. |
| for (const Fortran::parser::AccClause &clause : accClauseList.v) { |
| if (const auto *asyncClause = |
| std::get_if<Fortran::parser::AccClause::Async>(&clause.u)) { |
| genAsyncClause(converter, asyncClause, async, addAsyncAttr, stmtCtx); |
| } |
| } |
| |
| // The async clause of 'exit data' applies to all device types, |
| // so propagate the async clause to copyin/create/attach ops |
| // as if it is an async clause without preceding device_type clause. |
| llvm::SmallVector<mlir::Attribute> asyncDeviceTypes, asyncOnlyDeviceTypes; |
| llvm::SmallVector<mlir::Value> asyncValues; |
| auto noneDeviceTypeAttr = mlir::acc::DeviceTypeAttr::get( |
| builder.getContext(), mlir::acc::DeviceType::None); |
| if (addAsyncAttr) { |
| asyncOnlyDeviceTypes.push_back(noneDeviceTypeAttr); |
| } else if (async) { |
| asyncValues.push_back(async); |
| asyncDeviceTypes.push_back(noneDeviceTypeAttr); |
| } |
| |
| for (const Fortran::parser::AccClause &clause : accClauseList.v) { |
| mlir::Location clauseLocation = converter.genLocation(clause.source); |
| if (const auto *ifClause = |
| std::get_if<Fortran::parser::AccClause::If>(&clause.u)) { |
| genIfClause(converter, clauseLocation, ifClause, ifCond, stmtCtx); |
| } else if (const auto *waitClause = |
| std::get_if<Fortran::parser::AccClause::Wait>(&clause.u)) { |
| genWaitClause(converter, waitClause, waitOperands, waitDevnum, |
| addWaitAttr, stmtCtx); |
| } else if (const auto *copyoutClause = |
| std::get_if<Fortran::parser::AccClause::Copyout>( |
| &clause.u)) { |
| const Fortran::parser::AccObjectListWithModifier &listWithModifier = |
| copyoutClause->v; |
| const auto &accObjectList = |
| std::get<Fortran::parser::AccObjectList>(listWithModifier.t); |
| genDataOperandOperations<mlir::acc::GetDevicePtrOp>( |
| accObjectList, converter, semanticsContext, stmtCtx, copyoutOperands, |
| mlir::acc::DataClause::acc_copyout, false, /*implicit=*/false, |
| asyncValues, asyncDeviceTypes, asyncOnlyDeviceTypes); |
| } else if (const auto *deleteClause = |
| std::get_if<Fortran::parser::AccClause::Delete>(&clause.u)) { |
| genDataOperandOperations<mlir::acc::GetDevicePtrOp>( |
| deleteClause->v, converter, semanticsContext, stmtCtx, deleteOperands, |
| mlir::acc::DataClause::acc_delete, false, /*implicit=*/false, |
| asyncValues, asyncDeviceTypes, asyncOnlyDeviceTypes); |
| } else if (const auto *detachClause = |
| std::get_if<Fortran::parser::AccClause::Detach>(&clause.u)) { |
| genDataOperandOperations<mlir::acc::GetDevicePtrOp>( |
| detachClause->v, converter, semanticsContext, stmtCtx, detachOperands, |
| mlir::acc::DataClause::acc_detach, false, /*implicit=*/false, |
| asyncValues, asyncDeviceTypes, asyncOnlyDeviceTypes); |
| } else if (std::get_if<Fortran::parser::AccClause::Finalize>(&clause.u)) { |
| addFinalizeAttr = true; |
| } |
| } |
| |
| dataClauseOperands.append(copyoutOperands); |
| dataClauseOperands.append(deleteOperands); |
| dataClauseOperands.append(detachOperands); |
| |
| // Prepare the operand segment size attribute and the operands value range. |
| llvm::SmallVector<mlir::Value, 14> operands; |
| llvm::SmallVector<int32_t, 7> operandSegments; |
| addOperand(operands, operandSegments, ifCond); |
| addOperand(operands, operandSegments, async); |
| addOperand(operands, operandSegments, waitDevnum); |
| addOperands(operands, operandSegments, waitOperands); |
| addOperands(operands, operandSegments, dataClauseOperands); |
| |
| mlir::acc::ExitDataOp exitDataOp = createSimpleOp<mlir::acc::ExitDataOp>( |
| builder, currentLocation, operands, operandSegments); |
| |
| if (addAsyncAttr) |
| exitDataOp.setAsyncAttr(builder.getUnitAttr()); |
| if (addWaitAttr) |
| exitDataOp.setWaitAttr(builder.getUnitAttr()); |
| if (addFinalizeAttr) |
| exitDataOp.setFinalizeAttr(builder.getUnitAttr()); |
| |
| genDataExitOperations<mlir::acc::GetDevicePtrOp, mlir::acc::CopyoutOp>( |
| builder, copyoutOperands, /*structured=*/false); |
| genDataExitOperations<mlir::acc::GetDevicePtrOp, mlir::acc::DeleteOp>( |
| builder, deleteOperands, /*structured=*/false); |
| genDataExitOperations<mlir::acc::GetDevicePtrOp, mlir::acc::DetachOp>( |
| builder, detachOperands, /*structured=*/false); |
| } |
| |
| template <typename Op> |
| static void |
| genACCInitShutdownOp(Fortran::lower::AbstractConverter &converter, |
| mlir::Location currentLocation, |
| const Fortran::parser::AccClauseList &accClauseList) { |
| mlir::Value ifCond, deviceNum; |
| |
| fir::FirOpBuilder &builder = converter.getFirOpBuilder(); |
| Fortran::lower::StatementContext stmtCtx; |
| llvm::SmallVector<mlir::Attribute> deviceTypes; |
| |
| // Lower clauses values mapped to operands. |
| // Keep track of each group of operands separately as clauses can appear |
| // more than once. |
| for (const Fortran::parser::AccClause &clause : accClauseList.v) { |
| mlir::Location clauseLocation = converter.genLocation(clause.source); |
| if (const auto *ifClause = |
| std::get_if<Fortran::parser::AccClause::If>(&clause.u)) { |
| genIfClause(converter, clauseLocation, ifClause, ifCond, stmtCtx); |
| } else if (const auto *deviceNumClause = |
| std::get_if<Fortran::parser::AccClause::DeviceNum>( |
| &clause.u)) { |
| deviceNum = fir::getBase(converter.genExprValue( |
| *Fortran::semantics::GetExpr(deviceNumClause->v), stmtCtx)); |
| } else if (const auto *deviceTypeClause = |
| std::get_if<Fortran::parser::AccClause::DeviceType>( |
| &clause.u)) { |
| gatherDeviceTypeAttrs(builder, deviceTypeClause, deviceTypes); |
| } |
| } |
| |
| // Prepare the operand segment size attribute and the operands value range. |
| llvm::SmallVector<mlir::Value, 6> operands; |
| llvm::SmallVector<int32_t, 2> operandSegments; |
| |
| addOperand(operands, operandSegments, deviceNum); |
| addOperand(operands, operandSegments, ifCond); |
| |
| Op op = |
| createSimpleOp<Op>(builder, currentLocation, operands, operandSegments); |
| if (!deviceTypes.empty()) |
| op.setDeviceTypesAttr( |
| mlir::ArrayAttr::get(builder.getContext(), deviceTypes)); |
| } |
| |
| void genACCSetOp(Fortran::lower::AbstractConverter &converter, |
| mlir::Location currentLocation, |
| const Fortran::parser::AccClauseList &accClauseList) { |
| mlir::Value ifCond, deviceNum, defaultAsync; |
| llvm::SmallVector<mlir::Value> deviceTypeOperands; |
| |
| fir::FirOpBuilder &builder = converter.getFirOpBuilder(); |
| Fortran::lower::StatementContext stmtCtx; |
| llvm::SmallVector<mlir::Attribute> deviceTypes; |
| |
| // Lower clauses values mapped to operands. |
| // Keep track of each group of operands separately as clauses can appear |
| // more than once. |
| for (const Fortran::parser::AccClause &clause : accClauseList.v) { |
| mlir::Location clauseLocation = converter.genLocation(clause.source); |
| if (const auto *ifClause = |
| std::get_if<Fortran::parser::AccClause::If>(&clause.u)) { |
| genIfClause(converter, clauseLocation, ifClause, ifCond, stmtCtx); |
| } else if (const auto *defaultAsyncClause = |
| std::get_if<Fortran::parser::AccClause::DefaultAsync>( |
| &clause.u)) { |
| defaultAsync = fir::getBase(converter.genExprValue( |
| *Fortran::semantics::GetExpr(defaultAsyncClause->v), stmtCtx)); |
| } else if (const auto *deviceNumClause = |
| std::get_if<Fortran::parser::AccClause::DeviceNum>( |
| &clause.u)) { |
| deviceNum = fir::getBase(converter.genExprValue( |
| *Fortran::semantics::GetExpr(deviceNumClause->v), stmtCtx)); |
| } else if (const auto *deviceTypeClause = |
| std::get_if<Fortran::parser::AccClause::DeviceType>( |
| &clause.u)) { |
| gatherDeviceTypeAttrs(builder, deviceTypeClause, deviceTypes); |
| } |
| } |
| |
| // Prepare the operand segment size attribute and the operands value range. |
| llvm::SmallVector<mlir::Value> operands; |
| llvm::SmallVector<int32_t, 3> operandSegments; |
| addOperand(operands, operandSegments, defaultAsync); |
| addOperand(operands, operandSegments, deviceNum); |
| addOperand(operands, operandSegments, ifCond); |
| |
| auto op = createSimpleOp<mlir::acc::SetOp>(builder, currentLocation, operands, |
| operandSegments); |
| if (!deviceTypes.empty()) { |
| assert(deviceTypes.size() == 1 && "expect only one value for acc.set"); |
| op.setDeviceTypeAttr(mlir::cast<mlir::acc::DeviceTypeAttr>(deviceTypes[0])); |
| } |
| } |
| |
| static inline mlir::ArrayAttr |
| getArrayAttr(fir::FirOpBuilder &b, |
| llvm::SmallVector<mlir::Attribute> &attributes) { |
| return attributes.empty() ? nullptr : b.getArrayAttr(attributes); |
| } |
| |
| static inline mlir::ArrayAttr |
| getBoolArrayAttr(fir::FirOpBuilder &b, llvm::SmallVector<bool> &values) { |
| return values.empty() ? nullptr : b.getBoolArrayAttr(values); |
| } |
| |
| static inline mlir::DenseI32ArrayAttr |
| getDenseI32ArrayAttr(fir::FirOpBuilder &builder, |
| llvm::SmallVector<int32_t> &values) { |
| return values.empty() ? nullptr : builder.getDenseI32ArrayAttr(values); |
| } |
| |
| static void |
| genACCUpdateOp(Fortran::lower::AbstractConverter &converter, |
| mlir::Location currentLocation, |
| Fortran::semantics::SemanticsContext &semanticsContext, |
| Fortran::lower::StatementContext &stmtCtx, |
| const Fortran::parser::AccClauseList &accClauseList) { |
| mlir::Value ifCond; |
| llvm::SmallVector<mlir::Value> dataClauseOperands, updateHostOperands, |
| waitOperands, deviceTypeOperands, asyncOperands; |
| llvm::SmallVector<mlir::Attribute> asyncOperandsDeviceTypes, |
| asyncOnlyDeviceTypes, waitOperandsDeviceTypes, waitOnlyDeviceTypes; |
| llvm::SmallVector<bool> hasWaitDevnums; |
| llvm::SmallVector<int32_t> waitOperandsSegments; |
| |
| fir::FirOpBuilder &builder = converter.getFirOpBuilder(); |
| |
| // device_type attribute is set to `none` until a device_type clause is |
| // encountered. |
| llvm::SmallVector<mlir::Attribute> crtDeviceTypes; |
| crtDeviceTypes.push_back(mlir::acc::DeviceTypeAttr::get( |
| builder.getContext(), mlir::acc::DeviceType::None)); |
| |
| bool ifPresent = false; |
| |
| // Lower clauses values mapped to operands and array attributes. |
| // Keep track of each group of operands separately as clauses can appear |
| // more than once. |
| |
| // Process the clauses that may have a specified device_type first. |
| for (const Fortran::parser::AccClause &clause : accClauseList.v) { |
| if (const auto *asyncClause = |
| std::get_if<Fortran::parser::AccClause::Async>(&clause.u)) { |
| genAsyncClause(converter, asyncClause, asyncOperands, |
| asyncOperandsDeviceTypes, asyncOnlyDeviceTypes, |
| crtDeviceTypes, stmtCtx); |
| } else if (const auto *waitClause = |
| std::get_if<Fortran::parser::AccClause::Wait>(&clause.u)) { |
| genWaitClauseWithDeviceType(converter, waitClause, waitOperands, |
| waitOperandsDeviceTypes, waitOnlyDeviceTypes, |
| hasWaitDevnums, waitOperandsSegments, |
| crtDeviceTypes, stmtCtx); |
| } else if (const auto *deviceTypeClause = |
| std::get_if<Fortran::parser::AccClause::DeviceType>( |
| &clause.u)) { |
| crtDeviceTypes.clear(); |
| gatherDeviceTypeAttrs(builder, deviceTypeClause, crtDeviceTypes); |
| } |
| } |
| |
| // Process the clauses independent of device_type. |
| for (const Fortran::parser::AccClause &clause : accClauseList.v) { |
| mlir::Location clauseLocation = converter.genLocation(clause.source); |
| if (const auto *ifClause = |
| std::get_if<Fortran::parser::AccClause::If>(&clause.u)) { |
| genIfClause(converter, clauseLocation, ifClause, ifCond, stmtCtx); |
| } else if (const auto *hostClause = |
| std::get_if<Fortran::parser::AccClause::Host>(&clause.u)) { |
| genDataOperandOperations<mlir::acc::GetDevicePtrOp>( |
| hostClause->v, converter, semanticsContext, stmtCtx, |
| updateHostOperands, mlir::acc::DataClause::acc_update_host, false, |
| /*implicit=*/false, asyncOperands, asyncOperandsDeviceTypes, |
| asyncOnlyDeviceTypes); |
| } else if (const auto *deviceClause = |
| std::get_if<Fortran::parser::AccClause::Device>(&clause.u)) { |
| genDataOperandOperations<mlir::acc::UpdateDeviceOp>( |
| deviceClause->v, converter, semanticsContext, stmtCtx, |
| dataClauseOperands, mlir::acc::DataClause::acc_update_device, false, |
| /*implicit=*/false, asyncOperands, asyncOperandsDeviceTypes, |
| asyncOnlyDeviceTypes); |
| } else if (std::get_if<Fortran::parser::AccClause::IfPresent>(&clause.u)) { |
| ifPresent = true; |
| } else if (const auto *selfClause = |
| std::get_if<Fortran::parser::AccClause::Self>(&clause.u)) { |
| const std::optional<Fortran::parser::AccSelfClause> &accSelfClause = |
| selfClause->v; |
| const auto *accObjectList = |
| std::get_if<Fortran::parser::AccObjectList>(&(*accSelfClause).u); |
| assert(accObjectList && "expect AccObjectList"); |
| genDataOperandOperations<mlir::acc::GetDevicePtrOp>( |
| *accObjectList, converter, semanticsContext, stmtCtx, |
| updateHostOperands, mlir::acc::DataClause::acc_update_self, false, |
| /*implicit=*/false, asyncOperands, asyncOperandsDeviceTypes, |
| asyncOnlyDeviceTypes); |
| } |
| } |
| |
| dataClauseOperands.append(updateHostOperands); |
| |
| mlir::acc::UpdateOp::create( |
| builder, currentLocation, ifCond, asyncOperands, |
| getArrayAttr(builder, asyncOperandsDeviceTypes), |
| getArrayAttr(builder, asyncOnlyDeviceTypes), waitOperands, |
| getDenseI32ArrayAttr(builder, waitOperandsSegments), |
| getArrayAttr(builder, waitOperandsDeviceTypes), |
| getBoolArrayAttr(builder, hasWaitDevnums), |
| getArrayAttr(builder, waitOnlyDeviceTypes), dataClauseOperands, |
| ifPresent); |
| |
| genDataExitOperations<mlir::acc::GetDevicePtrOp, mlir::acc::UpdateHostOp>( |
| builder, updateHostOperands, /*structured=*/false); |
| } |
| |
| static void |
| genACC(Fortran::lower::AbstractConverter &converter, |
| Fortran::semantics::SemanticsContext &semanticsContext, |
| const Fortran::parser::OpenACCStandaloneConstruct &standaloneConstruct) { |
| const auto &standaloneDirective = |
| std::get<Fortran::parser::AccStandaloneDirective>(standaloneConstruct.t); |
| const auto &accClauseList = |
| std::get<Fortran::parser::AccClauseList>(standaloneConstruct.t); |
| |
| mlir::Location currentLocation = |
| converter.genLocation(standaloneDirective.source); |
| Fortran::lower::StatementContext stmtCtx; |
| |
| if (standaloneDirective.v == llvm::acc::Directive::ACCD_enter_data) { |
| genACCEnterDataOp(converter, currentLocation, semanticsContext, stmtCtx, |
| accClauseList); |
| } else if (standaloneDirective.v == llvm::acc::Directive::ACCD_exit_data) { |
| genACCExitDataOp(converter, currentLocation, semanticsContext, stmtCtx, |
| accClauseList); |
| } else if (standaloneDirective.v == llvm::acc::Directive::ACCD_init) { |
| genACCInitShutdownOp<mlir::acc::InitOp>(converter, currentLocation, |
| accClauseList); |
| } else if (standaloneDirective.v == llvm::acc::Directive::ACCD_shutdown) { |
| genACCInitShutdownOp<mlir::acc::ShutdownOp>(converter, currentLocation, |
| accClauseList); |
| } else if (standaloneDirective.v == llvm::acc::Directive::ACCD_set) { |
| genACCSetOp(converter, currentLocation, accClauseList); |
| } else if (standaloneDirective.v == llvm::acc::Directive::ACCD_update) { |
| genACCUpdateOp(converter, currentLocation, semanticsContext, stmtCtx, |
| accClauseList); |
| } |
| } |
| |
| static void genACC(Fortran::lower::AbstractConverter &converter, |
| const Fortran::parser::OpenACCWaitConstruct &waitConstruct) { |
| |
| const auto &waitArgument = |
| std::get<std::optional<Fortran::parser::AccWaitArgument>>( |
| waitConstruct.t); |
| const auto &accClauseList = |
| std::get<Fortran::parser::AccClauseList>(waitConstruct.t); |
| |
| mlir::Value ifCond, waitDevnum, async; |
| llvm::SmallVector<mlir::Value> waitOperands; |
| |
| // Async clause have optional values but can be present with |
| // no value as well. When there is no value, the op has an attribute to |
| // represent the clause. |
| bool addAsyncAttr = false; |
| |
| fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); |
| mlir::Location currentLocation = converter.genLocation(waitConstruct.source); |
| Fortran::lower::StatementContext stmtCtx; |
| |
| if (waitArgument) { // wait has a value. |
| const Fortran::parser::AccWaitArgument &waitArg = *waitArgument; |
| const auto &waitList = |
| std::get<std::list<Fortran::parser::ScalarIntExpr>>(waitArg.t); |
| for (const Fortran::parser::ScalarIntExpr &value : waitList) { |
| mlir::Value v = fir::getBase( |
| converter.genExprValue(*Fortran::semantics::GetExpr(value), stmtCtx)); |
| waitOperands.push_back(v); |
| } |
| |
| const auto &waitDevnumValue = |
| std::get<std::optional<Fortran::parser::ScalarIntExpr>>(waitArg.t); |
| if (waitDevnumValue) |
| waitDevnum = fir::getBase(converter.genExprValue( |
| *Fortran::semantics::GetExpr(*waitDevnumValue), stmtCtx)); |
| } |
| |
| // Lower clauses values mapped to operands. |
| // Keep track of each group of operands separately as clauses can appear |
| // more than once. |
| for (const Fortran::parser::AccClause &clause : accClauseList.v) { |
| mlir::Location clauseLocation = converter.genLocation(clause.source); |
| if (const auto *ifClause = |
| std::get_if<Fortran::parser::AccClause::If>(&clause.u)) { |
| genIfClause(converter, clauseLocation, ifClause, ifCond, stmtCtx); |
| } else if (const auto *asyncClause = |
| std::get_if<Fortran::parser::AccClause::Async>(&clause.u)) { |
| genAsyncClause(converter, asyncClause, async, addAsyncAttr, stmtCtx); |
| } |
| } |
| |
| // Prepare the operand segment size attribute and the operands value range. |
| llvm::SmallVector<mlir::Value> operands; |
| llvm::SmallVector<int32_t> operandSegments; |
| addOperands(operands, operandSegments, waitOperands); |
| addOperand(operands, operandSegments, async); |
| addOperand(operands, operandSegments, waitDevnum); |
| addOperand(operands, operandSegments, ifCond); |
| |
| mlir::acc::WaitOp waitOp = createSimpleOp<mlir::acc::WaitOp>( |
| firOpBuilder, currentLocation, operands, operandSegments); |
| |
| if (addAsyncAttr) |
| waitOp.setAsyncAttr(firOpBuilder.getUnitAttr()); |
| } |
| |
| template <typename EntryOp> |
| static void createDeclareAllocFunc(mlir::OpBuilder &modBuilder, |
| fir::FirOpBuilder &builder, |
| mlir::Location loc, fir::GlobalOp &globalOp, |
| mlir::acc::DataClause clause) { |
| std::stringstream registerFuncName; |
| registerFuncName << globalOp.getSymName().str() |
| << Fortran::lower::declarePostAllocSuffix.str(); |
| auto registerFuncOp = |
| createDeclareFunc(modBuilder, builder, loc, registerFuncName.str()); |
| |
| fir::AddrOfOp addrOp = fir::AddrOfOp::create( |
| builder, loc, fir::ReferenceType::get(globalOp.getType()), |
| globalOp.getSymbol()); |
| |
| std::stringstream asFortran; |
| asFortran << Fortran::lower::mangle::demangleName(globalOp.getSymName()); |
| std::stringstream asFortranDesc; |
| asFortranDesc << asFortran.str(); |
| llvm::SmallVector<mlir::Value> bounds; |
| |
| EntryOp descEntryOp = createDataEntryOp<EntryOp>( |
| builder, loc, addrOp, asFortranDesc, bounds, |
| /*structured=*/false, /*implicit=*/true, clause, addrOp.getType(), |
| /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{}); |
| mlir::acc::DeclareEnterOp::create( |
| builder, loc, mlir::acc::DeclareTokenType::get(descEntryOp.getContext()), |
| mlir::ValueRange(descEntryOp.getAccVar())); |
| |
| modBuilder.setInsertionPointAfter(registerFuncOp); |
| } |
| |
| /// Action to be performed on deallocation are split in two distinct functions. |
| /// - Pre deallocation function includes all the action to be performed before |
| /// the actual deallocation is done on the host side. |
| /// - Post deallocation function includes update to the descriptor. |
| template <typename ExitOp> |
| static void createDeclareDeallocFunc(mlir::OpBuilder &modBuilder, |
| fir::FirOpBuilder &builder, |
| mlir::Location loc, |
| fir::GlobalOp &globalOp, |
| mlir::acc::DataClause clause) { |
| std::stringstream asFortran; |
| asFortran << Fortran::lower::mangle::demangleName(globalOp.getSymName()); |
| |
| std::stringstream postDeallocFuncName; |
| postDeallocFuncName << globalOp.getSymName().str() |
| << Fortran::lower::declarePostDeallocSuffix.str(); |
| auto postDeallocOp = |
| createDeclareFunc(modBuilder, builder, loc, postDeallocFuncName.str()); |
| |
| fir::AddrOfOp addrOp = fir::AddrOfOp::create( |
| builder, loc, fir::ReferenceType::get(globalOp.getType()), |
| globalOp.getSymbol()); |
| llvm::SmallVector<mlir::Value> bounds; |
| // End the structured declare region using declare_exit. |
| mlir::acc::GetDevicePtrOp descEntryOp = |
| createDataEntryOp<mlir::acc::GetDevicePtrOp>( |
| builder, loc, addrOp, asFortran, bounds, |
| /*structured=*/false, /*implicit=*/true, clause, addrOp.getType(), |
| /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{}); |
| mlir::acc::DeclareExitOp::create(builder, loc, mlir::Value{}, |
| mlir::ValueRange(descEntryOp.getAccVar())); |
| modBuilder.setInsertionPointAfter(postDeallocOp); |
| } |
| |
| template <typename EntryOp, typename ExitOp> |
| static void genGlobalCtors(Fortran::lower::AbstractConverter &converter, |
| mlir::OpBuilder &modBuilder, |
| const Fortran::parser::AccObjectList &accObjectList, |
| mlir::acc::DataClause clause) { |
| fir::FirOpBuilder &builder = converter.getFirOpBuilder(); |
| auto genCtors = [&](const mlir::Location operandLocation, |
| const Fortran::semantics::Symbol &symbol) { |
| std::string globalName = converter.mangleName(symbol); |
| fir::GlobalOp globalOp = builder.getNamedGlobal(globalName); |
| std::stringstream declareGlobalCtorName; |
| declareGlobalCtorName << globalName << "_acc_ctor"; |
| std::stringstream declareGlobalDtorName; |
| declareGlobalDtorName << globalName << "_acc_dtor"; |
| std::stringstream asFortran; |
| asFortran << symbol.name().ToString(); |
| |
| if (builder.getModule().lookupSymbol<mlir::acc::GlobalConstructorOp>( |
| declareGlobalCtorName.str())) |
| return; |
| |
| if (!globalOp) { |
| if (Fortran::semantics::FindEquivalenceSet(symbol)) { |
| for (Fortran::semantics::EquivalenceObject eqObj : |
| *Fortran::semantics::FindEquivalenceSet(symbol)) { |
| std::string eqName = converter.mangleName(eqObj.symbol); |
| globalOp = builder.getNamedGlobal(eqName); |
| if (globalOp) |
| break; |
| } |
| |
| if (!globalOp) |
| llvm::report_fatal_error("could not retrieve global symbol"); |
| } else { |
| llvm::report_fatal_error("could not retrieve global symbol"); |
| } |
| } |
| |
| addDeclareAttr(builder, globalOp.getOperation(), clause); |
| auto crtPos = builder.saveInsertionPoint(); |
| modBuilder.setInsertionPointAfter(globalOp); |
| if (mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(globalOp.getType()))) { |
| createDeclareGlobalOp<mlir::acc::GlobalConstructorOp, mlir::acc::CopyinOp, |
| mlir::acc::DeclareEnterOp, ExitOp>( |
| modBuilder, builder, operandLocation, globalOp, clause, |
| declareGlobalCtorName.str(), /*implicit=*/true, asFortran); |
| createDeclareAllocFunc<EntryOp>(modBuilder, builder, operandLocation, |
| globalOp, clause); |
| if constexpr (!std::is_same_v<EntryOp, ExitOp>) |
| createDeclareDeallocFunc<ExitOp>(modBuilder, builder, operandLocation, |
| globalOp, clause); |
| } else { |
| createDeclareGlobalOp<mlir::acc::GlobalConstructorOp, EntryOp, |
| mlir::acc::DeclareEnterOp, ExitOp>( |
| modBuilder, builder, operandLocation, globalOp, clause, |
| declareGlobalCtorName.str(), /*implicit=*/false, asFortran); |
| } |
| if constexpr (!std::is_same_v<EntryOp, ExitOp>) { |
| createDeclareGlobalOp<mlir::acc::GlobalDestructorOp, |
| mlir::acc::GetDevicePtrOp, mlir::acc::DeclareExitOp, |
| ExitOp>( |
| modBuilder, builder, operandLocation, globalOp, clause, |
| declareGlobalDtorName.str(), /*implicit=*/false, asFortran); |
| } |
| builder.restoreInsertionPoint(crtPos); |
| }; |
| for (const auto &accObject : accObjectList.v) { |
| mlir::Location operandLocation = genOperandLocation(converter, accObject); |
| Fortran::common::visit( |
| Fortran::common::visitors{ |
| [&](const Fortran::parser::Designator &designator) { |
| if (const auto *name = |
| Fortran::parser::GetDesignatorNameIfDataRef(designator)) { |
| genCtors(operandLocation, *name->symbol); |
| } |
| }, |
| [&](const Fortran::parser::Name &name) { |
| if (const auto *symbol = name.symbol) { |
| if (symbol |
| ->detailsIf<Fortran::semantics::CommonBlockDetails>()) { |
| genCtors(operandLocation, *symbol); |
| } else { |
| TODO(operandLocation, |
| "OpenACC Global Ctor from parser::Name"); |
| } |
| } |
| }}, |
| accObject.u); |
| } |
| } |
| |
| template <typename Clause, typename EntryOp, typename ExitOp> |
| static void |
| genGlobalCtorsWithModifier(Fortran::lower::AbstractConverter &converter, |
| mlir::OpBuilder &modBuilder, const Clause *x, |
| Fortran::parser::AccDataModifier::Modifier mod, |
| const mlir::acc::DataClause clause, |
| const mlir::acc::DataClause clauseWithModifier) { |
| const Fortran::parser::AccObjectListWithModifier &listWithModifier = x->v; |
| const auto &accObjectList = |
| std::get<Fortran::parser::AccObjectList>(listWithModifier.t); |
| const auto &modifier = |
| std::get<std::optional<Fortran::parser::AccDataModifier>>( |
| listWithModifier.t); |
| mlir::acc::DataClause dataClause = |
| (modifier && (*modifier).v == mod) ? clauseWithModifier : clause; |
| genGlobalCtors<EntryOp, ExitOp>(converter, modBuilder, accObjectList, |
| dataClause); |
| } |
| |
| static fir::GlobalOp |
| lookupGlobalBySymbolOrEquivalence(Fortran::lower::AbstractConverter &converter, |
| fir::FirOpBuilder &builder, |
| const Fortran::semantics::Symbol &sym) { |
| const Fortran::semantics::Symbol *commonBlock = |
| Fortran::semantics::FindCommonBlockContaining(sym); |
| std::string globalName = commonBlock ? converter.mangleName(*commonBlock) |
| : converter.mangleName(sym); |
| if (fir::GlobalOp g = builder.getNamedGlobal(globalName)) { |
| return g; |
| } |
| // Not found: if not a COMMON member, try equivalence members |
| if (!commonBlock) { |
| if (const Fortran::semantics::EquivalenceSet *eqSet = |
| Fortran::semantics::FindEquivalenceSet(sym)) { |
| for (const Fortran::semantics::EquivalenceObject &eqObj : *eqSet) { |
| std::string eqName = converter.mangleName(eqObj.symbol); |
| if (fir::GlobalOp g = builder.getNamedGlobal(eqName)) |
| return g; |
| } |
| } |
| } |
| return {}; |
| } |
| |
| template <typename EmitterFn> |
| static void emitCommonGlobal(Fortran::lower::AbstractConverter &converter, |
| fir::FirOpBuilder &builder, |
| const Fortran::parser::AccObject &obj, |
| mlir::acc::DataClause clause, |
| EmitterFn &&emitCtorDtor) { |
| Fortran::semantics::Symbol &sym = getSymbolFromAccObject(obj); |
| if (!(sym.detailsIf<Fortran::semantics::CommonBlockDetails>() || |
| Fortran::semantics::FindCommonBlockContaining(sym))) |
| return; |
| |
| fir::GlobalOp globalOp = |
| lookupGlobalBySymbolOrEquivalence(converter, builder, sym); |
| if (!globalOp) |
| llvm::report_fatal_error("could not retrieve global symbol"); |
| |
| std::stringstream ctorName; |
| ctorName << globalOp.getSymName().str() << "_acc_ctor"; |
| if (builder.getModule().lookupSymbol<mlir::acc::GlobalConstructorOp>( |
| ctorName.str())) |
| return; |
| |
| mlir::Location operandLocation = genOperandLocation(converter, obj); |
| addDeclareAttr(builder, globalOp.getOperation(), clause); |
| mlir::OpBuilder modBuilder(builder.getModule().getBodyRegion()); |
| modBuilder.setInsertionPointAfter(globalOp); |
| std::stringstream asFortran; |
| asFortran << sym.name().ToString(); |
| |
| auto savedIP = builder.saveInsertionPoint(); |
| emitCtorDtor(modBuilder, operandLocation, globalOp, clause, asFortran, |
| ctorName.str()); |
| builder.restoreInsertionPoint(savedIP); |
| } |
| |
| static void |
| genDeclareInFunction(Fortran::lower::AbstractConverter &converter, |
| Fortran::semantics::SemanticsContext &semanticsContext, |
| Fortran::lower::StatementContext &openAccCtx, |
| mlir::Location loc, |
| const Fortran::parser::AccClauseList &accClauseList) { |
| llvm::SmallVector<mlir::Value> dataClauseOperands, copyEntryOperands, |
| copyinEntryOperands, createEntryOperands, copyoutEntryOperands, |
| presentEntryOperands, deviceResidentEntryOperands; |
| Fortran::lower::StatementContext stmtCtx; |
| fir::FirOpBuilder &builder = converter.getFirOpBuilder(); |
| |
| for (const Fortran::parser::AccClause &clause : accClauseList.v) { |
| if (const auto *copyClause = |
| std::get_if<Fortran::parser::AccClause::Copy>(&clause.u)) { |
| auto crtDataStart = dataClauseOperands.size(); |
| genDeclareDataOperandOperations<mlir::acc::CopyinOp, |
| mlir::acc::CopyoutOp>( |
| copyClause->v, converter, semanticsContext, stmtCtx, |
| dataClauseOperands, mlir::acc::DataClause::acc_copy, |
| /*structured=*/true, /*implicit=*/false); |
| copyEntryOperands.append(dataClauseOperands.begin() + crtDataStart, |
| dataClauseOperands.end()); |
| } else if (const auto *createClause = |
| std::get_if<Fortran::parser::AccClause::Create>(&clause.u)) { |
| auto crtDataStart = dataClauseOperands.size(); |
| const auto &accObjectList = |
| std::get<Fortran::parser::AccObjectList>(createClause->v.t); |
| genDeclareDataOperandOperations<mlir::acc::CreateOp, mlir::acc::DeleteOp>( |
| accObjectList, converter, semanticsContext, stmtCtx, |
| dataClauseOperands, mlir::acc::DataClause::acc_create, |
| /*structured=*/true, /*implicit=*/false); |
| createEntryOperands.append(dataClauseOperands.begin() + crtDataStart, |
| dataClauseOperands.end()); |
| } else if (const auto *presentClause = |
| std::get_if<Fortran::parser::AccClause::Present>( |
| &clause.u)) { |
| auto crtDataStart = dataClauseOperands.size(); |
| genDeclareDataOperandOperations<mlir::acc::PresentOp, |
| mlir::acc::DeleteOp>( |
| presentClause->v, converter, semanticsContext, stmtCtx, |
| dataClauseOperands, mlir::acc::DataClause::acc_present, |
| /*structured=*/true, /*implicit=*/false); |
| presentEntryOperands.append(dataClauseOperands.begin() + crtDataStart, |
| dataClauseOperands.end()); |
| } else if (const auto *copyinClause = |
| std::get_if<Fortran::parser::AccClause::Copyin>(&clause.u)) { |
| auto crtDataStart = dataClauseOperands.size(); |
| genDeclareDataOperandOperationsWithModifier<mlir::acc::CopyinOp, |
| mlir::acc::DeleteOp>( |
| copyinClause, converter, semanticsContext, stmtCtx, |
| Fortran::parser::AccDataModifier::Modifier::ReadOnly, |
| dataClauseOperands, mlir::acc::DataClause::acc_copyin, |
| mlir::acc::DataClause::acc_copyin_readonly); |
| copyinEntryOperands.append(dataClauseOperands.begin() + crtDataStart, |
| dataClauseOperands.end()); |
| } else if (const auto *copyoutClause = |
| std::get_if<Fortran::parser::AccClause::Copyout>( |
| &clause.u)) { |
| auto crtDataStart = dataClauseOperands.size(); |
| const auto &accObjectList = |
| std::get<Fortran::parser::AccObjectList>(copyoutClause->v.t); |
| genDeclareDataOperandOperations<mlir::acc::CreateOp, |
| mlir::acc::CopyoutOp>( |
| accObjectList, converter, semanticsContext, stmtCtx, |
| dataClauseOperands, mlir::acc::DataClause::acc_copyout, |
| /*structured=*/true, /*implicit=*/false); |
| copyoutEntryOperands.append(dataClauseOperands.begin() + crtDataStart, |
| dataClauseOperands.end()); |
| } else if (const auto *devicePtrClause = |
| std::get_if<Fortran::parser::AccClause::Deviceptr>( |
| &clause.u)) { |
| genDeclareDataOperandOperations<mlir::acc::DevicePtrOp, |
| mlir::acc::DevicePtrOp>( |
| devicePtrClause->v, converter, semanticsContext, stmtCtx, |
| dataClauseOperands, mlir::acc::DataClause::acc_deviceptr, |
| /*structured=*/true, /*implicit=*/false); |
| } else if (const auto *linkClause = |
| std::get_if<Fortran::parser::AccClause::Link>(&clause.u)) { |
| genDeclareDataOperandOperations<mlir::acc::DeclareLinkOp, |
| mlir::acc::DeclareLinkOp>( |
| linkClause->v, converter, semanticsContext, stmtCtx, |
| dataClauseOperands, mlir::acc::DataClause::acc_declare_link, |
| /*structured=*/true, /*implicit=*/false); |
| } else if (const auto *deviceResidentClause = |
| std::get_if<Fortran::parser::AccClause::DeviceResident>( |
| &clause.u)) { |
| auto crtDataStart = dataClauseOperands.size(); |
| genDeclareDataOperandOperations<mlir::acc::DeclareDeviceResidentOp, |
| mlir::acc::DeleteOp>( |
| deviceResidentClause->v, converter, semanticsContext, stmtCtx, |
| dataClauseOperands, |
| mlir::acc::DataClause::acc_declare_device_resident, |
| /*structured=*/true, /*implicit=*/false); |
| deviceResidentEntryOperands.append( |
| dataClauseOperands.begin() + crtDataStart, dataClauseOperands.end()); |
| } else { |
| mlir::Location clauseLocation = converter.genLocation(clause.source); |
| TODO(clauseLocation, "clause on declare directive"); |
| } |
| } |
| |
| // If no structured operands were generated (all objects were COMMON), |
| // do not create a declare region. |
| if (dataClauseOperands.empty()) |
| return; |
| |
| mlir::func::FuncOp funcOp = builder.getFunction(); |
| auto ops = funcOp.getOps<mlir::acc::DeclareEnterOp>(); |
| mlir::Value declareToken; |
| if (ops.empty()) { |
| declareToken = mlir::acc::DeclareEnterOp::create( |
| builder, loc, mlir::acc::DeclareTokenType::get(builder.getContext()), |
| dataClauseOperands); |
| } else { |
| auto declareOp = *ops.begin(); |
| auto newDeclareOp = mlir::acc::DeclareEnterOp::create( |
| builder, loc, mlir::acc::DeclareTokenType::get(builder.getContext()), |
| declareOp.getDataClauseOperands()); |
| newDeclareOp.getDataClauseOperandsMutable().append(dataClauseOperands); |
| declareToken = newDeclareOp.getToken(); |
| declareOp.erase(); |
| } |
| |
| openAccCtx.attachCleanup([&builder, loc, createEntryOperands, |
| copyEntryOperands, copyinEntryOperands, |
| copyoutEntryOperands, presentEntryOperands, |
| deviceResidentEntryOperands, declareToken]() { |
| llvm::SmallVector<mlir::Value> operands; |
| operands.append(createEntryOperands); |
| operands.append(deviceResidentEntryOperands); |
| operands.append(copyEntryOperands); |
| operands.append(copyinEntryOperands); |
| operands.append(copyoutEntryOperands); |
| operands.append(presentEntryOperands); |
| |
| mlir::func::FuncOp funcOp = builder.getFunction(); |
| auto ops = funcOp.getOps<mlir::acc::DeclareExitOp>(); |
| if (ops.empty()) { |
| mlir::acc::DeclareExitOp::create(builder, loc, declareToken, operands); |
| } else { |
| auto declareOp = *ops.begin(); |
| declareOp.getDataClauseOperandsMutable().append(operands); |
| } |
| |
| genDataExitOperations<mlir::acc::CreateOp, mlir::acc::DeleteOp>( |
| builder, createEntryOperands, /*structured=*/true); |
| genDataExitOperations<mlir::acc::DeclareDeviceResidentOp, |
| mlir::acc::DeleteOp>( |
| builder, deviceResidentEntryOperands, /*structured=*/true); |
| genDataExitOperations<mlir::acc::CopyinOp, mlir::acc::CopyoutOp>( |
| builder, copyEntryOperands, /*structured=*/true); |
| genDataExitOperations<mlir::acc::CopyinOp, mlir::acc::DeleteOp>( |
| builder, copyinEntryOperands, /*structured=*/true); |
| genDataExitOperations<mlir::acc::CreateOp, mlir::acc::CopyoutOp>( |
| builder, copyoutEntryOperands, /*structured=*/true); |
| genDataExitOperations<mlir::acc::PresentOp, mlir::acc::DeleteOp>( |
| builder, presentEntryOperands, /*structured=*/true); |
| }); |
| } |
| |
| static void |
| genDeclareInModule(Fortran::lower::AbstractConverter &converter, |
| mlir::ModuleOp moduleOp, |
| const Fortran::parser::AccClauseList &accClauseList) { |
| mlir::OpBuilder modBuilder(moduleOp.getBodyRegion()); |
| for (const Fortran::parser::AccClause &clause : accClauseList.v) { |
| if (const auto *createClause = |
| std::get_if<Fortran::parser::AccClause::Create>(&clause.u)) { |
| const Fortran::parser::AccObjectListWithModifier &listWithModifier = |
| createClause->v; |
| const auto &accObjectList = |
| std::get<Fortran::parser::AccObjectList>(listWithModifier.t); |
| genGlobalCtors<mlir::acc::CreateOp, mlir::acc::DeleteOp>( |
| converter, modBuilder, accObjectList, |
| mlir::acc::DataClause::acc_create); |
| } else if (const auto *copyinClause = |
| std::get_if<Fortran::parser::AccClause::Copyin>(&clause.u)) { |
| genGlobalCtorsWithModifier<Fortran::parser::AccClause::Copyin, |
| mlir::acc::CopyinOp, mlir::acc::DeleteOp>( |
| converter, modBuilder, copyinClause, |
| Fortran::parser::AccDataModifier::Modifier::ReadOnly, |
| mlir::acc::DataClause::acc_copyin, |
| mlir::acc::DataClause::acc_copyin_readonly); |
| } else if (const auto *deviceResidentClause = |
| std::get_if<Fortran::parser::AccClause::DeviceResident>( |
| &clause.u)) { |
| genGlobalCtors<mlir::acc::DeclareDeviceResidentOp, mlir::acc::DeleteOp>( |
| converter, modBuilder, deviceResidentClause->v, |
| mlir::acc::DataClause::acc_declare_device_resident); |
| } else if (const auto *linkClause = |
| std::get_if<Fortran::parser::AccClause::Link>(&clause.u)) { |
| genGlobalCtors<mlir::acc::DeclareLinkOp, mlir::acc::DeclareLinkOp>( |
| converter, modBuilder, linkClause->v, |
| mlir::acc::DataClause::acc_declare_link); |
| } else { |
| llvm::report_fatal_error("unsupported clause on DECLARE directive"); |
| } |
| } |
| } |
| |
| static void genACC(Fortran::lower::AbstractConverter &converter, |
| Fortran::semantics::SemanticsContext &semanticsContext, |
| Fortran::lower::StatementContext &openAccCtx, |
| const Fortran::parser::OpenACCStandaloneDeclarativeConstruct |
| &declareConstruct) { |
| |
| const auto &declarativeDir = |
| std::get<Fortran::parser::AccDeclarativeDirective>(declareConstruct.t); |
| mlir::Location directiveLocation = |
| converter.genLocation(declarativeDir.source); |
| const auto &accClauseList = |
| std::get<Fortran::parser::AccClauseList>(declareConstruct.t); |
| |
| if (declarativeDir.v == llvm::acc::Directive::ACCD_declare) { |
| fir::FirOpBuilder &builder = converter.getFirOpBuilder(); |
| auto moduleOp = |
| builder.getBlock()->getParent()->getParentOfType<mlir::ModuleOp>(); |
| auto funcOp = |
| builder.getBlock()->getParent()->getParentOfType<mlir::func::FuncOp>(); |
| if (funcOp) |
| genDeclareInFunction(converter, semanticsContext, openAccCtx, |
| directiveLocation, accClauseList); |
| else if (moduleOp) |
| genDeclareInModule(converter, moduleOp, accClauseList); |
| return; |
| } |
| llvm_unreachable("unsupported declarative directive"); |
| } |
| |
| static bool hasDeviceType(llvm::SmallVector<mlir::Attribute> &arrayAttr, |
| mlir::acc::DeviceType deviceType) { |
| for (auto attr : arrayAttr) { |
| auto deviceTypeAttr = mlir::dyn_cast<mlir::acc::DeviceTypeAttr>(attr); |
| if (deviceTypeAttr.getValue() == deviceType) |
| return true; |
| } |
| return false; |
| } |
| |
| template <typename RetTy, typename AttrTy> |
| static std::optional<RetTy> |
| getAttributeValueByDeviceType(llvm::SmallVector<mlir::Attribute> &attributes, |
| llvm::SmallVector<mlir::Attribute> &deviceTypes, |
| mlir::acc::DeviceType deviceType) { |
| assert(attributes.size() == deviceTypes.size() && |
| "expect same number of attributes"); |
| for (auto it : llvm::enumerate(deviceTypes)) { |
| auto deviceTypeAttr = mlir::dyn_cast<mlir::acc::DeviceTypeAttr>(it.value()); |
| if (deviceTypeAttr.getValue() == deviceType) { |
| if constexpr (std::is_same_v<mlir::StringAttr, AttrTy>) { |
| auto strAttr = mlir::dyn_cast<AttrTy>(attributes[it.index()]); |
| return strAttr.getValue(); |
| } else if constexpr (std::is_same_v<mlir::IntegerAttr, AttrTy>) { |
| auto intAttr = |
| mlir::dyn_cast<mlir::IntegerAttr>(attributes[it.index()]); |
| return intAttr.getInt(); |
| } |
| } |
| } |
| return std::nullopt; |
| } |
| |
| // Helper function to extract string value from bind name variant |
| static std::optional<llvm::StringRef> getBindNameStringValue( |
| const std::optional<std::variant<mlir::SymbolRefAttr, mlir::StringAttr>> |
| &bindNameValue) { |
| if (!bindNameValue.has_value()) |
| return std::nullopt; |
| |
| return std::visit( |
| [](const auto &attr) -> std::optional<llvm::StringRef> { |
| if constexpr (std::is_same_v<std::decay_t<decltype(attr)>, |
| mlir::StringAttr>) { |
| return attr.getValue(); |
| } else if constexpr (std::is_same_v<std::decay_t<decltype(attr)>, |
| mlir::SymbolRefAttr>) { |
| return attr.getLeafReference(); |
| } else { |
| return std::nullopt; |
| } |
| }, |
| bindNameValue.value()); |
| } |
| |
| static bool compareDeviceTypeInfo( |
| mlir::acc::RoutineOp op, |
| llvm::SmallVector<mlir::Attribute> &bindIdNameArrayAttr, |
| llvm::SmallVector<mlir::Attribute> &bindStrNameArrayAttr, |
| llvm::SmallVector<mlir::Attribute> &bindIdNameDeviceTypeArrayAttr, |
| llvm::SmallVector<mlir::Attribute> &bindStrNameDeviceTypeArrayAttr, |
| llvm::SmallVector<mlir::Attribute> &gangArrayAttr, |
| llvm::SmallVector<mlir::Attribute> &gangDimArrayAttr, |
| llvm::SmallVector<mlir::Attribute> &gangDimDeviceTypeArrayAttr, |
| llvm::SmallVector<mlir::Attribute> &seqArrayAttr, |
| llvm::SmallVector<mlir::Attribute> &workerArrayAttr, |
| llvm::SmallVector<mlir::Attribute> &vectorArrayAttr) { |
| for (uint32_t dtypeInt = 0; |
| dtypeInt != mlir::acc::getMaxEnumValForDeviceType(); ++dtypeInt) { |
| auto dtype = static_cast<mlir::acc::DeviceType>(dtypeInt); |
| auto bindNameValue = getBindNameStringValue(op.getBindNameValue(dtype)); |
| if (bindNameValue != |
| getAttributeValueByDeviceType<llvm::StringRef, mlir::StringAttr>( |
| bindIdNameArrayAttr, bindIdNameDeviceTypeArrayAttr, dtype) && |
| bindNameValue != |
| getAttributeValueByDeviceType<llvm::StringRef, mlir::StringAttr>( |
| bindStrNameArrayAttr, bindStrNameDeviceTypeArrayAttr, dtype)) |
| return false; |
| if (op.hasGang(dtype) != hasDeviceType(gangArrayAttr, dtype)) |
| return false; |
| if (op.getGangDimValue(dtype) != |
| getAttributeValueByDeviceType<int64_t, mlir::IntegerAttr>( |
| gangDimArrayAttr, gangDimDeviceTypeArrayAttr, dtype)) |
| return false; |
| if (op.hasSeq(dtype) != hasDeviceType(seqArrayAttr, dtype)) |
| return false; |
| if (op.hasWorker(dtype) != hasDeviceType(workerArrayAttr, dtype)) |
| return false; |
| if (op.hasVector(dtype) != hasDeviceType(vectorArrayAttr, dtype)) |
| return false; |
| } |
| return true; |
| } |
| |
| static void attachRoutineInfo(mlir::func::FuncOp func, |
| mlir::SymbolRefAttr routineAttr) { |
| llvm::SmallVector<mlir::SymbolRefAttr> routines; |
| if (func.getOperation()->hasAttr(mlir::acc::getRoutineInfoAttrName())) { |
| auto routineInfo = |
| func.getOperation()->getAttrOfType<mlir::acc::RoutineInfoAttr>( |
| mlir::acc::getRoutineInfoAttrName()); |
| routines.append(routineInfo.getAccRoutines().begin(), |
| routineInfo.getAccRoutines().end()); |
| } |
| routines.push_back(routineAttr); |
| func.getOperation()->setAttr( |
| mlir::acc::getRoutineInfoAttrName(), |
| mlir::acc::RoutineInfoAttr::get(func.getContext(), routines)); |
| } |
| |
| static mlir::ArrayAttr |
| getArrayAttrOrNull(fir::FirOpBuilder &builder, |
| llvm::SmallVector<mlir::Attribute> &attributes) { |
| if (attributes.empty()) { |
| return nullptr; |
| } else { |
| return builder.getArrayAttr(attributes); |
| } |
| } |
| |
| void createOpenACCRoutineConstruct( |
| Fortran::lower::AbstractConverter &converter, mlir::Location loc, |
| mlir::ModuleOp mod, mlir::func::FuncOp funcOp, std::string funcName, |
| bool hasNohost, llvm::SmallVector<mlir::Attribute> &bindIdNames, |
| llvm::SmallVector<mlir::Attribute> &bindStrNames, |
| llvm::SmallVector<mlir::Attribute> &bindIdNameDeviceTypes, |
| llvm::SmallVector<mlir::Attribute> &bindStrNameDeviceTypes, |
| llvm::SmallVector<mlir::Attribute> &gangDeviceTypes, |
| llvm::SmallVector<mlir::Attribute> &gangDimValues, |
| llvm::SmallVector<mlir::Attribute> &gangDimDeviceTypes, |
| llvm::SmallVector<mlir::Attribute> &seqDeviceTypes, |
| llvm::SmallVector<mlir::Attribute> &workerDeviceTypes, |
| llvm::SmallVector<mlir::Attribute> &vectorDeviceTypes) { |
| |
| for (auto routineOp : mod.getOps<mlir::acc::RoutineOp>()) { |
| if (routineOp.getFuncName().getLeafReference().str().compare(funcName) == |
| 0) { |
| // If the routine is already specified with the same clauses, just skip |
| // the operation creation. |
| if (compareDeviceTypeInfo(routineOp, bindIdNames, bindStrNames, |
| bindIdNameDeviceTypes, bindStrNameDeviceTypes, |
| gangDeviceTypes, gangDimValues, |
| gangDimDeviceTypes, seqDeviceTypes, |
| workerDeviceTypes, vectorDeviceTypes) && |
| routineOp.getNohost() == hasNohost) |
| return; |
| mlir::emitError(loc, "Routine already specified with different clauses"); |
| } |
| } |
| std::stringstream routineOpName; |
| routineOpName << accRoutinePrefix.str() << routineCounter++; |
| std::string routineOpStr = routineOpName.str(); |
| mlir::OpBuilder modBuilder(mod.getBodyRegion()); |
| fir::FirOpBuilder &builder = converter.getFirOpBuilder(); |
| mlir::acc::RoutineOp::create( |
| modBuilder, loc, routineOpStr, |
| mlir::SymbolRefAttr::get(builder.getContext(), funcName), |
| getArrayAttrOrNull(builder, bindIdNames), |
| getArrayAttrOrNull(builder, bindStrNames), |
| getArrayAttrOrNull(builder, bindIdNameDeviceTypes), |
| getArrayAttrOrNull(builder, bindStrNameDeviceTypes), |
| getArrayAttrOrNull(builder, workerDeviceTypes), |
| getArrayAttrOrNull(builder, vectorDeviceTypes), |
| getArrayAttrOrNull(builder, seqDeviceTypes), hasNohost, |
| /*implicit=*/false, getArrayAttrOrNull(builder, gangDeviceTypes), |
| getArrayAttrOrNull(builder, gangDimValues), |
| getArrayAttrOrNull(builder, gangDimDeviceTypes)); |
| |
| attachRoutineInfo(funcOp, builder.getSymbolRefAttr(routineOpStr)); |
| } |
| |
| static void interpretRoutineDeviceInfo( |
| Fortran::lower::AbstractConverter &converter, |
| const Fortran::semantics::OpenACCRoutineDeviceTypeInfo &dinfo, |
| llvm::SmallVector<mlir::Attribute> &seqDeviceTypes, |
| llvm::SmallVector<mlir::Attribute> &vectorDeviceTypes, |
| llvm::SmallVector<mlir::Attribute> &workerDeviceTypes, |
| llvm::SmallVector<mlir::Attribute> &bindIdNameDeviceTypes, |
| llvm::SmallVector<mlir::Attribute> &bindStrNameDeviceTypes, |
| llvm::SmallVector<mlir::Attribute> &bindIdNames, |
| llvm::SmallVector<mlir::Attribute> &bindStrNames, |
| llvm::SmallVector<mlir::Attribute> &gangDeviceTypes, |
| llvm::SmallVector<mlir::Attribute> &gangDimValues, |
| llvm::SmallVector<mlir::Attribute> &gangDimDeviceTypes) { |
| fir::FirOpBuilder &builder = converter.getFirOpBuilder(); |
| auto getDeviceTypeAttr = [&]() -> mlir::Attribute { |
| auto context = builder.getContext(); |
| auto value = getDeviceType(dinfo.dType()); |
| return mlir::acc::DeviceTypeAttr::get(context, value); |
| }; |
| if (dinfo.isSeq()) { |
| seqDeviceTypes.push_back(getDeviceTypeAttr()); |
| } |
| if (dinfo.isVector()) { |
| vectorDeviceTypes.push_back(getDeviceTypeAttr()); |
| } |
| if (dinfo.isWorker()) { |
| workerDeviceTypes.push_back(getDeviceTypeAttr()); |
| } |
| if (dinfo.isGang()) { |
| unsigned gangDim = dinfo.gangDim(); |
| auto deviceType = getDeviceTypeAttr(); |
| if (!gangDim) { |
| gangDeviceTypes.push_back(deviceType); |
| } else { |
| gangDimValues.push_back( |
| builder.getIntegerAttr(builder.getI64Type(), gangDim)); |
| gangDimDeviceTypes.push_back(deviceType); |
| } |
| } |
| if (dinfo.bindNameOpt().has_value()) { |
| const auto &bindName = dinfo.bindNameOpt().value(); |
| mlir::Attribute bindNameAttr; |
| if (const auto &bindSym{ |
| std::get_if<Fortran::semantics::SymbolRef>(&bindName)}) { |
| bindNameAttr = builder.getSymbolRefAttr(converter.mangleName(*bindSym)); |
| bindIdNames.push_back(bindNameAttr); |
| bindIdNameDeviceTypes.push_back(getDeviceTypeAttr()); |
| } else if (const auto &bindStr{std::get_if<std::string>(&bindName)}) { |
| bindNameAttr = builder.getStringAttr(*bindStr); |
| bindStrNames.push_back(bindNameAttr); |
| bindStrNameDeviceTypes.push_back(getDeviceTypeAttr()); |
| } else { |
| llvm_unreachable("Unsupported bind name type"); |
| } |
| } |
| } |
| |
| void Fortran::lower::genOpenACCRoutineConstruct( |
| Fortran::lower::AbstractConverter &converter, mlir::ModuleOp mod, |
| mlir::func::FuncOp funcOp, |
| const std::vector<Fortran::semantics::OpenACCRoutineInfo> &routineInfos) { |
| CHECK(funcOp && "Expected a valid function operation"); |
| mlir::Location loc{funcOp.getLoc()}; |
| std::string funcName{funcOp.getName()}; |
| |
| // Collect the routine clauses |
| bool hasNohost{false}; |
| |
| llvm::SmallVector<mlir::Attribute> seqDeviceTypes, vectorDeviceTypes, |
| workerDeviceTypes, bindIdNameDeviceTypes, bindStrNameDeviceTypes, |
| bindIdNames, bindStrNames, gangDeviceTypes, gangDimDeviceTypes, |
| gangDimValues; |
| |
| for (const Fortran::semantics::OpenACCRoutineInfo &info : routineInfos) { |
| // Device Independent Attributes |
| if (info.isNohost()) { |
| hasNohost = true; |
| } |
| // Note: Device Independent Attributes are set to the |
| // none device type in `info`. |
| interpretRoutineDeviceInfo( |
| converter, info, seqDeviceTypes, vectorDeviceTypes, workerDeviceTypes, |
| bindIdNameDeviceTypes, bindStrNameDeviceTypes, bindIdNames, |
| bindStrNames, gangDeviceTypes, gangDimValues, gangDimDeviceTypes); |
| |
| // Device Dependent Attributes |
| for (const Fortran::semantics::OpenACCRoutineDeviceTypeInfo &dinfo : |
| info.deviceTypeInfos()) { |
| interpretRoutineDeviceInfo(converter, dinfo, seqDeviceTypes, |
| vectorDeviceTypes, workerDeviceTypes, |
| bindIdNameDeviceTypes, bindStrNameDeviceTypes, |
| bindIdNames, bindStrNames, gangDeviceTypes, |
| gangDimValues, gangDimDeviceTypes); |
| } |
| } |
| createOpenACCRoutineConstruct( |
| converter, loc, mod, funcOp, funcName, hasNohost, bindIdNames, |
| bindStrNames, bindIdNameDeviceTypes, bindStrNameDeviceTypes, |
| gangDeviceTypes, gangDimValues, gangDimDeviceTypes, seqDeviceTypes, |
| workerDeviceTypes, vectorDeviceTypes); |
| } |
| |
| static void |
| genACC(Fortran::lower::AbstractConverter &converter, |
| Fortran::lower::pft::Evaluation &eval, |
| const Fortran::parser::OpenACCAtomicConstruct &atomicConstruct) { |
| |
| mlir::Location loc = converter.genLocation(atomicConstruct.source); |
| Fortran::common::visit( |
| Fortran::common::visitors{ |
| [&](const Fortran::parser::AccAtomicRead &atomicRead) { |
| genAtomicRead(converter, atomicRead, loc); |
| }, |
| [&](const Fortran::parser::AccAtomicWrite &atomicWrite) { |
| genAtomicWrite(converter, atomicWrite, loc); |
| }, |
| [&](const Fortran::parser::AccAtomicUpdate &atomicUpdate) { |
| genAtomicUpdate(converter, atomicUpdate, loc); |
| }, |
| [&](const Fortran::parser::AccAtomicCapture &atomicCapture) { |
| genAtomicCapture(converter, atomicCapture, loc); |
| }, |
| }, |
| atomicConstruct.u); |
| } |
| |
| static void |
| genACC(Fortran::lower::AbstractConverter &converter, |
| Fortran::semantics::SemanticsContext &semanticsContext, |
| const Fortran::parser::OpenACCCacheConstruct &cacheConstruct) { |
| mlir::Location loc = converter.genLocation(cacheConstruct.source); |
| TODO(loc, "OpenACC cache directive"); |
| } |
| |
| mlir::Value Fortran::lower::genOpenACCConstruct( |
| Fortran::lower::AbstractConverter &converter, |
| Fortran::semantics::SemanticsContext &semanticsContext, |
| Fortran::lower::pft::Evaluation &eval, |
| const Fortran::parser::OpenACCConstruct &accConstruct, |
| Fortran::lower::SymMap &localSymbols) { |
| |
| mlir::Value exitCond; |
| Fortran::common::visit( |
| common::visitors{ |
| [&](const Fortran::parser::OpenACCBlockConstruct &blockConstruct) { |
| genACC(converter, semanticsContext, eval, blockConstruct, |
| localSymbols); |
| }, |
| [&](const Fortran::parser::OpenACCCombinedConstruct |
| &combinedConstruct) { |
| genACC(converter, semanticsContext, eval, combinedConstruct); |
| }, |
| [&](const Fortran::parser::OpenACCLoopConstruct &loopConstruct) { |
| exitCond = genACC(converter, semanticsContext, eval, loopConstruct); |
| }, |
| [&](const Fortran::parser::OpenACCStandaloneConstruct |
| &standaloneConstruct) { |
| genACC(converter, semanticsContext, standaloneConstruct); |
| }, |
| [&](const Fortran::parser::OpenACCCacheConstruct &cacheConstruct) { |
| genACC(converter, semanticsContext, cacheConstruct); |
| }, |
| [&](const Fortran::parser::OpenACCWaitConstruct &waitConstruct) { |
| genACC(converter, waitConstruct); |
| }, |
| [&](const Fortran::parser::OpenACCAtomicConstruct &atomicConstruct) { |
| genACC(converter, eval, atomicConstruct); |
| }, |
| [&](const Fortran::parser::OpenACCEndConstruct &) { |
| // No op |
| }, |
| }, |
| accConstruct.u); |
| return exitCond; |
| } |
| |
| void Fortran::lower::genOpenACCDeclarativeConstruct( |
| Fortran::lower::AbstractConverter &converter, |
| Fortran::semantics::SemanticsContext &semanticsContext, |
| Fortran::lower::StatementContext &openAccCtx, |
| const Fortran::parser::OpenACCDeclarativeConstruct &accDeclConstruct) { |
| |
| Fortran::common::visit( |
| common::visitors{ |
| [&](const Fortran::parser::OpenACCStandaloneDeclarativeConstruct |
| &standaloneDeclarativeConstruct) { |
| genACC(converter, semanticsContext, openAccCtx, |
| standaloneDeclarativeConstruct); |
| }, |
| [&](const Fortran::parser::OpenACCRoutineConstruct &x) {}, |
| }, |
| accDeclConstruct.u); |
| } |
| |
| void Fortran::lower::attachDeclarePostAllocAction( |
| AbstractConverter &converter, fir::FirOpBuilder &builder, |
| const Fortran::semantics::Symbol &sym) { |
| std::stringstream fctName; |
| fctName << converter.mangleName(sym) << declarePostAllocSuffix.str(); |
| mlir::Operation *op = &builder.getInsertionBlock()->back(); |
| |
| if (auto resOp = mlir::dyn_cast<fir::ResultOp>(*op)) { |
| assert(resOp.getOperands().size() == 0 && |
| "expect only fir.result op with no operand"); |
| op = op->getPrevNode(); |
| } |
| assert(op && "expect operation to attach the post allocation action"); |
| |
| if (op->hasAttr(mlir::acc::getDeclareActionAttrName())) { |
| auto attr = op->getAttrOfType<mlir::acc::DeclareActionAttr>( |
| mlir::acc::getDeclareActionAttrName()); |
| op->setAttr(mlir::acc::getDeclareActionAttrName(), |
| mlir::acc::DeclareActionAttr::get( |
| builder.getContext(), attr.getPreAlloc(), |
| /*postAlloc=*/builder.getSymbolRefAttr(fctName.str()), |
| attr.getPreDealloc(), attr.getPostDealloc())); |
| } else { |
| op->setAttr(mlir::acc::getDeclareActionAttrName(), |
| mlir::acc::DeclareActionAttr::get( |
| builder.getContext(), |
| /*preAlloc=*/{}, |
| /*postAlloc=*/builder.getSymbolRefAttr(fctName.str()), |
| /*preDealloc=*/{}, /*postDealloc=*/{})); |
| } |
| } |
| |
| void Fortran::lower::attachDeclarePreDeallocAction( |
| AbstractConverter &converter, fir::FirOpBuilder &builder, |
| mlir::Value beginOpValue, const Fortran::semantics::Symbol &sym) { |
| if (!sym.test(Fortran::semantics::Symbol::Flag::AccCreate) && |
| !sym.test(Fortran::semantics::Symbol::Flag::AccCopyIn) && |
| !sym.test(Fortran::semantics::Symbol::Flag::AccCopyInReadOnly) && |
| !sym.test(Fortran::semantics::Symbol::Flag::AccCopy) && |
| !sym.test(Fortran::semantics::Symbol::Flag::AccCopyOut) && |
| !sym.test(Fortran::semantics::Symbol::Flag::AccDeviceResident)) |
| return; |
| |
| std::stringstream fctName; |
| fctName << converter.mangleName(sym) << declarePreDeallocSuffix.str(); |
| |
| auto *op = beginOpValue.getDefiningOp(); |
| if (op->hasAttr(mlir::acc::getDeclareActionAttrName())) { |
| auto attr = op->getAttrOfType<mlir::acc::DeclareActionAttr>( |
| mlir::acc::getDeclareActionAttrName()); |
| op->setAttr(mlir::acc::getDeclareActionAttrName(), |
| mlir::acc::DeclareActionAttr::get( |
| builder.getContext(), attr.getPreAlloc(), |
| attr.getPostAlloc(), |
| /*preDealloc=*/builder.getSymbolRefAttr(fctName.str()), |
| attr.getPostDealloc())); |
| } else { |
| op->setAttr(mlir::acc::getDeclareActionAttrName(), |
| mlir::acc::DeclareActionAttr::get( |
| builder.getContext(), |
| /*preAlloc=*/{}, /*postAlloc=*/{}, |
| /*preDealloc=*/builder.getSymbolRefAttr(fctName.str()), |
| /*postDealloc=*/{})); |
| } |
| } |
| |
| void Fortran::lower::attachDeclarePostDeallocAction( |
| AbstractConverter &converter, fir::FirOpBuilder &builder, |
| const Fortran::semantics::Symbol &sym) { |
| if (!sym.test(Fortran::semantics::Symbol::Flag::AccCreate) && |
| !sym.test(Fortran::semantics::Symbol::Flag::AccCopyIn) && |
| !sym.test(Fortran::semantics::Symbol::Flag::AccCopyInReadOnly) && |
| !sym.test(Fortran::semantics::Symbol::Flag::AccCopy) && |
| !sym.test(Fortran::semantics::Symbol::Flag::AccCopyOut) && |
| !sym.test(Fortran::semantics::Symbol::Flag::AccDeviceResident)) |
| return; |
| |
| std::stringstream fctName; |
| fctName << converter.mangleName(sym) << declarePostDeallocSuffix.str(); |
| mlir::Operation *op = &builder.getInsertionBlock()->back(); |
| if (auto resOp = mlir::dyn_cast<fir::ResultOp>(*op)) { |
| assert(resOp.getOperands().size() == 0 && |
| "expect only fir.result op with no operand"); |
| op = op->getPrevNode(); |
| } |
| assert(op && "expect operation to attach the post deallocation action"); |
| if (op->hasAttr(mlir::acc::getDeclareActionAttrName())) { |
| auto attr = op->getAttrOfType<mlir::acc::DeclareActionAttr>( |
| mlir::acc::getDeclareActionAttrName()); |
| op->setAttr(mlir::acc::getDeclareActionAttrName(), |
| mlir::acc::DeclareActionAttr::get( |
| builder.getContext(), attr.getPreAlloc(), |
| attr.getPostAlloc(), attr.getPreDealloc(), |
| /*postDealloc=*/builder.getSymbolRefAttr(fctName.str()))); |
| } else { |
| op->setAttr(mlir::acc::getDeclareActionAttrName(), |
| mlir::acc::DeclareActionAttr::get( |
| builder.getContext(), |
| /*preAlloc=*/{}, /*postAlloc=*/{}, /*preDealloc=*/{}, |
| /*postDealloc=*/builder.getSymbolRefAttr(fctName.str()))); |
| } |
| } |
| |
| void Fortran::lower::genOpenACCTerminator(fir::FirOpBuilder &builder, |
| mlir::Operation *op, |
| mlir::Location loc) { |
| if (mlir::isa<mlir::acc::ParallelOp, mlir::acc::LoopOp>(op)) |
| mlir::acc::YieldOp::create(builder, loc); |
| else |
| mlir::acc::TerminatorOp::create(builder, loc); |
| } |
| |
| bool Fortran::lower::isInOpenACCLoop(fir::FirOpBuilder &builder) { |
| if (builder.getBlock()->getParent()->getParentOfType<mlir::acc::LoopOp>()) |
| return true; |
| return false; |
| } |
| |
| bool Fortran::lower::isInsideOpenACCComputeConstruct( |
| fir::FirOpBuilder &builder) { |
| return mlir::isa_and_nonnull<ACC_COMPUTE_CONSTRUCT_OPS>( |
| mlir::acc::getEnclosingComputeOp(builder.getRegion())); |
| } |
| |
| void Fortran::lower::setInsertionPointAfterOpenACCLoopIfInside( |
| fir::FirOpBuilder &builder) { |
| if (auto loopOp = |
| builder.getBlock()->getParent()->getParentOfType<mlir::acc::LoopOp>()) |
| builder.setInsertionPointAfter(loopOp); |
| } |
| |
| void Fortran::lower::genEarlyReturnInOpenACCLoop(fir::FirOpBuilder &builder, |
| mlir::Location loc) { |
| mlir::Value yieldValue = |
| builder.createIntegerConstant(loc, builder.getI1Type(), 1); |
| mlir::acc::YieldOp::create(builder, loc, yieldValue); |
| } |
| |
| uint64_t Fortran::lower::getLoopCountForCollapseAndTile( |
| const Fortran::parser::AccClauseList &clauseList) { |
| uint64_t collapseLoopCount = getCollapseSizeAndForce(clauseList).first; |
| uint64_t tileLoopCount = 1; |
| for (const Fortran::parser::AccClause &clause : clauseList.v) { |
| if (const auto *tileClause = |
| std::get_if<Fortran::parser::AccClause::Tile>(&clause.u)) { |
| const parser::AccTileExprList &tileExprList = tileClause->v; |
| tileLoopCount = tileExprList.v.size(); |
| } |
| } |
| return tileLoopCount > collapseLoopCount ? tileLoopCount : collapseLoopCount; |
| } |
| |
| std::pair<uint64_t, bool> Fortran::lower::getCollapseSizeAndForce( |
| const Fortran::parser::AccClauseList &clauseList) { |
| uint64_t size = 1; |
| bool force = false; |
| for (const Fortran::parser::AccClause &clause : clauseList.v) { |
| if (const auto *collapseClause = |
| std::get_if<Fortran::parser::AccClause::Collapse>(&clause.u)) { |
| const Fortran::parser::AccCollapseArg &arg = collapseClause->v; |
| force = std::get<bool>(arg.t); |
| const auto &collapseValue = |
| std::get<Fortran::parser::ScalarIntConstantExpr>(arg.t); |
| size = *Fortran::semantics::GetIntValue(collapseValue); |
| break; |
| } |
| } |
| return {size, force}; |
| } |
| |
| /// Create an ACC loop operation for a DO construct when inside ACC compute |
| /// constructs This serves as a bridge between regular DO construct handling and |
| /// ACC loop creation |
| mlir::Operation *Fortran::lower::genOpenACCLoopFromDoConstruct( |
| AbstractConverter &converter, |
| Fortran::semantics::SemanticsContext &semanticsContext, |
| Fortran::lower::SymMap &localSymbols, |
| const Fortran::parser::DoConstruct &doConstruct, pft::Evaluation &eval) { |
| if (!lowerDoLoopToAccLoop) |
| return nullptr; |
| |
| // Only convert loops which have induction variables that need privatized. |
| if (!doConstruct.IsDoNormal() && !doConstruct.IsDoConcurrent()) |
| return nullptr; |
| |
| // If the evaluation is unstructured, then we cannot convert the loop |
| // because acc loop does not have an unstructured form. |
| // TODO: There may be other strategies that can be employed such |
| // as generating acc.private for the loop variables without attaching |
| // them to acc.loop. |
| // For now - generate a not-yet-implemented message because without |
| // privatizing the induction variable, the loop may not execute correctly. |
| // Only do this for `acc kernels` because in `acc parallel`, scalars end |
| // up as implicitly firstprivate. |
| if (eval.lowerAsUnstructured()) { |
| if (mlir::isa_and_present<mlir::acc::KernelsOp>( |
| mlir::acc::getEnclosingComputeOp( |
| converter.getFirOpBuilder().getRegion()))) |
| TODO(converter.getCurrentLocation(), |
| "unstructured do loop in acc kernels"); |
| return nullptr; |
| } |
| |
| // Prepare empty operand vectors since there are no associated `acc loop` |
| // clauses with the Fortran do loops being handled here. |
| llvm::SmallVector<mlir::Value> privateOperands, gangOperands, |
| workerNumOperands, vectorOperands, tileOperands, cacheOperands, |
| reductionOperands; |
| llvm::SmallVector<mlir::Type> retTy; |
| AccDataMap dataMap; |
| mlir::Value yieldValue; |
| uint64_t loopsToProcess = 1; // Single loop construct |
| |
| // Use same mechanism that handles `acc loop` contained do loops to handle |
| // the implicit loop case. |
| Fortran::lower::StatementContext stmtCtx; |
| auto loopOp = buildACCLoopOp( |
| converter, converter.getCurrentLocation(), semanticsContext, stmtCtx, |
| doConstruct, eval, privateOperands, dataMap, gangOperands, |
| workerNumOperands, vectorOperands, tileOperands, cacheOperands, |
| reductionOperands, retTy, yieldValue, loopsToProcess); |
| |
| // Normal do loops which are not annotated with `acc loop` should be |
| // left for analysis by marking with `auto`. This is the case even in the case |
| // of `acc parallel` region because the normal rules of applying `independent` |
| // is only for loops marked with `acc loop`. |
| // For do concurrent loops, the spec says in section 2.17.2: |
| // "When do concurrent appears without a loop construct in a kernels construct |
| // it is treated as if it is annotated with loop auto. If it appears in a |
| // parallel construct or an accelerator routine then it is treated as if it is |
| // annotated with loop independent." |
| // So this means that in all cases we mark with `auto` unless it is a |
| // `do concurrent` in an `acc parallel` construct or it must be `seq` because |
| // it is in an `acc serial` construct. |
| fir::FirOpBuilder &builder = converter.getFirOpBuilder(); |
| mlir::Operation *accRegionOp = |
| mlir::acc::getEnclosingComputeOp(builder.getRegion()); |
| mlir::acc::LoopParMode parMode = |
| mlir::isa_and_present<mlir::acc::ParallelOp>(accRegionOp) && |
| doConstruct.IsDoConcurrent() |
| ? mlir::acc::LoopParMode::loop_independent |
| : mlir::isa_and_present<mlir::acc::SerialOp>(accRegionOp) |
| ? mlir::acc::LoopParMode::loop_seq |
| : mlir::acc::LoopParMode::loop_auto; |
| |
| // Set the parallel mode based on the computed parMode |
| auto deviceNoneAttr = mlir::acc::DeviceTypeAttr::get( |
| builder.getContext(), mlir::acc::DeviceType::None); |
| auto arrOfDeviceNone = |
| mlir::ArrayAttr::get(builder.getContext(), deviceNoneAttr); |
| if (parMode == mlir::acc::LoopParMode::loop_independent) { |
| loopOp.setIndependentAttr(arrOfDeviceNone); |
| } else if (parMode == mlir::acc::LoopParMode::loop_seq) { |
| loopOp.setSeqAttr(arrOfDeviceNone); |
| } else if (parMode == mlir::acc::LoopParMode::loop_auto) { |
| loopOp.setAuto_Attr(arrOfDeviceNone); |
| } else { |
| llvm_unreachable("Unexpected loop par mode"); |
| } |
| |
| return loopOp; |
| } |