diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h index 1b19807bff0cf..19790a08bcade 100644 --- a/flang/include/flang/Lower/AbstractConverter.h +++ b/flang/include/flang/Lower/AbstractConverter.h @@ -401,6 +401,10 @@ class AbstractConverter { virtual mlir::StateStack &getStateStack() = 0; + virtual void + genPermutatedLoops(llvm::ArrayRef doStmts, + Fortran::lower::pft::Evaluation *innermostDo) = 0; + private: /// Options controlling lowering behavior. const Fortran::lower::LoweringOptions &loweringOptions; diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index a8f405dd03d1c..7a47cd98ef4fd 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -2385,6 +2385,124 @@ class FirConverter : public Fortran::lower::AbstractConverter { // so no clean-up needs to be generated for these entities. } + void + genPermutatedLoops(llvm::ArrayRef doStmts, + Fortran::lower::pft::Evaluation *innermostDo) override { + // Fortran::lower::pft::Evaluation &eval = getEval(); + // bool unstructuredContext = eval.lowerAsUnstructured(); + + llvm::SmallVector headerBlocks; + llvm::SmallVector loopInfos; + + auto enterLoop = [&](Fortran::lower::pft::Evaluation &eval) { + bool unstructuredContext = eval.lowerAsUnstructured(); + + // Collect loop nest information. + // Generate begin loop code directly for infinite and while loops. + Fortran::lower::pft::Evaluation &doStmtEval = + eval.getFirstNestedEvaluation(); + auto *doStmt = doStmtEval.getIf(); + const auto &loopControl = + std::get>(doStmt->t); + mlir::Block *preheaderBlock = doStmtEval.block; + mlir::Block *beginBlock = + preheaderBlock ? preheaderBlock : builder->getBlock(); + auto createNextBeginBlock = [&]() { + // Step beginBlock through unstructured preheader, header, and mask + // blocks, created in outermost to innermost order. + return beginBlock = beginBlock->splitBlock(beginBlock->end()); + }; + mlir::Block *headerBlock = + unstructuredContext ? createNextBeginBlock() : nullptr; + headerBlocks.push_back(headerBlock); + mlir::Block *bodyBlock = doStmtEval.lexicalSuccessor->block; + mlir::Block *exitBlock = doStmtEval.parentConstruct->constructExit->block; + IncrementLoopNestInfo &incrementLoopNestInfo = loopInfos.emplace_back(); + const Fortran::parser::ScalarLogicalExpr *whileCondition = nullptr; + bool infiniteLoop = !loopControl.has_value(); + if (infiniteLoop) { + assert(unstructuredContext && "infinite loop must be unstructured"); + startBlock(headerBlock); + } else if ((whileCondition = + std::get_if( + &loopControl->u))) { + assert(unstructuredContext && "while loop must be unstructured"); + maybeStartBlock(preheaderBlock); // no block or empty block + startBlock(headerBlock); + genConditionalBranch(*whileCondition, bodyBlock, exitBlock); + } else if (const auto *bounds = + std::get_if( + &loopControl->u)) { + // Non-concurrent increment loop. + IncrementLoopInfo &info = incrementLoopNestInfo.emplace_back( + *bounds->Name().thing.symbol, bounds->Lower(), bounds->Upper(), + bounds->Step()); + if (unstructuredContext) { + maybeStartBlock(preheaderBlock); + info.hasRealControl = info.loopVariableSym->GetType()->IsNumeric( + Fortran::common::TypeCategory::Real); + info.headerBlock = headerBlock; + info.bodyBlock = bodyBlock; + info.exitBlock = exitBlock; + } + } else { + llvm_unreachable("Cannot permute DO CONCURRENT"); + } + + // Increment loop begin code. (Infinite/while code was already generated.) + if (!infiniteLoop && !whileCondition) + genFIRIncrementLoopBegin(incrementLoopNestInfo, doStmtEval.dirs); + }; + + auto leaveLoop = [&](Fortran::lower::pft::Evaluation &eval, + mlir::Block *headerBlock, + IncrementLoopNestInfo &incrementLoopNestInfo) { + bool unstructuredContext = eval.lowerAsUnstructured(); + + Fortran::lower::pft::Evaluation &doStmtEval = + eval.getFirstNestedEvaluation(); + auto *doStmt = doStmtEval.getIf(); + + const auto &loopControl = + std::get>(doStmt->t); + bool infiniteLoop = !loopControl.has_value(); + const Fortran::parser::ScalarLogicalExpr *whileCondition = + std::get_if(&loopControl->u); + + auto iter = std::prev(eval.getNestedEvaluations().end()); + + // An EndDoStmt in unstructured code may start a new block. + Fortran::lower::pft::Evaluation &endDoEval = *iter; + assert(endDoEval.getIf() && "no enddo stmt"); + if (unstructuredContext) + maybeStartBlock(endDoEval.block); + + // Loop end code. + if (infiniteLoop || whileCondition) + genBranch(headerBlock); + else + genFIRIncrementLoopEnd(incrementLoopNestInfo); + + // This call may generate a branch in some contexts. + genFIR(endDoEval, unstructuredContext); + }; + + for (auto l : doStmts) + enterLoop(*l); + + // Loop body code. + bool innermostUnstructuredContext = innermostDo->lowerAsUnstructured(); + + auto iter = innermostDo->getNestedEvaluations().begin(); + for (auto end = --innermostDo->getNestedEvaluations().end(); iter != end; + ++iter) + genFIR(*iter, innermostUnstructuredContext); + + for (auto &&[l, headerBlock, li] : + llvm::zip_equal(doStmts, headerBlocks, loopInfos)) + leaveLoop(*l, headerBlock, li); + } + void attachInlineAttributes( mlir::Operation &op, const llvm::ArrayRef &dirs) { diff --git a/flang/lib/Lower/OpenMP/Decomposer.cpp b/flang/lib/Lower/OpenMP/Decomposer.cpp index 9bfbf67bec88c..bf09bed395285 100644 --- a/flang/lib/Lower/OpenMP/Decomposer.cpp +++ b/flang/lib/Lower/OpenMP/Decomposer.cpp @@ -98,14 +98,39 @@ ConstructQueue buildConstructQueue( return decompose.output; } +// from clang +static bool isOpenMPLoopTransformationDirective(llvm::omp::Directive DKind) { + return DKind == llvm::omp::Directive::OMPD_tile || + DKind == llvm::omp::Directive::OMPD_unroll || + DKind == llvm::omp::Directive::OMPD_reverse || + DKind == llvm::omp::Directive::OMPD_interchange || + DKind == llvm::omp::Directive::OMPD_stripe; +} + +llvm::iterator_range getNonTransformQueue( + llvm::iterator_range range) { + // remove trailing loop transformations + auto b = range.begin(); + auto e = range.end(); + while (e != b) { + auto e2 = e - 1; + if (!isOpenMPLoopTransformationDirective(e2->id)) + break; + e = e2; + } + + return llvm::make_range(b, e); +} + bool matchLeafSequence(ConstructQueue::const_iterator item, const ConstructQueue &queue, llvm::omp::Directive directive) { llvm::ArrayRef leafDirs = llvm::omp::getLeafConstructsOrSelf(directive); - for (auto [dir, leaf] : - llvm::zip_longest(leafDirs, llvm::make_range(item, queue.end()))) { + for (auto [dir, leaf] : llvm::zip_longest( + leafDirs, + getNonTransformQueue(llvm::make_range(item, queue.end())))) { if (!dir.has_value() || !leaf.has_value()) return false; diff --git a/flang/lib/Lower/OpenMP/Decomposer.h b/flang/lib/Lower/OpenMP/Decomposer.h index 65492bd76280d..f057009629efc 100644 --- a/flang/lib/Lower/OpenMP/Decomposer.h +++ b/flang/lib/Lower/OpenMP/Decomposer.h @@ -57,6 +57,10 @@ bool isLastItemInQueue(ConstructQueue::const_iterator item, bool matchLeafSequence(ConstructQueue::const_iterator item, const ConstructQueue &queue, llvm::omp::Directive directive); + +llvm::iterator_range getNonTransformQueue( + llvm::iterator_range range); + } // namespace Fortran::lower::omp #endif // FORTRAN_LOWER_OPENMP_DECOMPOSER_H diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index e2018add11206..b5f61222eee20 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -567,6 +567,37 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, static lower::pft::Evaluation * getCollapsedLoopEval(lower::pft::Evaluation &eval, int collapseValue) { + + const parser::OpenMPConstruct *ompCons = + eval.getIf(); + if (auto *ompLoop{std::get_if(&ompCons->u)}) { + const parser::OpenMPLoopConstruct *innerConstruct = + ompLoop->GetNestedConstruct(); + + int permutationLengthValue = 0; + if (innerConstruct) { + const auto &innerLoopDirective = *innerConstruct; + const auto &innerBegin = + std::get(innerLoopDirective.t); + const auto &innerDirective = + Fortran::parser::omp::GetOmpDirectiveName(innerBegin).v; + + if (innerDirective == llvm::omp::Directive::OMPD_interchange) { + // Get the size values from parse tree and convert to a vector + const auto &innerClauseList{innerBegin.Clauses()}; + for (const auto &clause : innerClauseList.v) { + if (const auto tclause{ + std::get_if(&clause.u)}) { + permutationLengthValue = tclause->v.size(); + } + } + // default: permution(2,1) + if (permutationLengthValue == 0) + permutationLengthValue = 2; + } + } + } + // Return the Evaluation of the innermost collapsed loop, or the current one // if there was no COLLAPSE. if (collapseValue == 0) @@ -1231,8 +1262,11 @@ static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info, } if (!info.genSkeletonOnly) { + // Transforms already processed by getLoopNestOp + auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); + auto transforms = llvm::make_range(q.end(), queue.end()); if (ConstructQueue::const_iterator next = std::next(item); - next != queue.end()) { + next != transforms.begin() && next != queue.end()) { genOMPDispatch(info.converter, info.symTable, info.semaCtx, info.eval, info.loc, queue, next); } else { @@ -1551,7 +1585,8 @@ genLoopNestClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const List &clauses, mlir::Location loc, mlir::omp::LoopNestOperands &clauseOps, - llvm::SmallVectorImpl &iv) { + llvm::SmallVectorImpl &iv, + bool enableInterchange = false) { ClauseProcessor cp(converter, semaCtx, clauses); HostEvalInfo *hostEvalInfo = getHostEvalInfoStackTop(converter); @@ -1559,6 +1594,28 @@ genLoopNestClauses(lower::AbstractConverter &converter, cp.processCollapse(loc, eval, clauseOps, clauseOps, iv); clauseOps.loopInclusive = converter.getFirOpBuilder().getUnitAttr(); + + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + for (auto &clause : clauses) { + if (clause.id == llvm::omp::Clause::OMPC_collapse) { + const auto &collapse = std::get(clause.u); + int64_t collapseValue = evaluate::ToInt64(collapse.v).value(); + clauseOps.collapseNumLoops = + firOpBuilder.getI64IntegerAttr(collapseValue); + } else if (clause.id == llvm::omp::Clause::OMPC_sizes) { + // This case handles the stand-alone tiling construct + const auto &sizes = std::get(clause.u); + llvm::SmallVector sizeValues; + for (auto &size : sizes.v) { + int64_t sizeValue = evaluate::ToInt64(size).value(); + sizeValues.push_back(sizeValue); + } + clauseOps.tileSizes = sizeValues; + } else if (clause.id == llvm::omp::Clause::OMPC_permutation) { + llvm_unreachable("MK: To handle standalone interchange construct"); + } + } + cp.processTileSizes(eval, clauseOps); } @@ -1987,7 +2044,9 @@ static mlir::omp::LoopNestOp genLoopNestOp( llvm::ArrayRef< std::pair> wrapperArgs, - llvm::omp::Directive directive, DataSharingProcessor &dsp) { + llvm::omp::Directive directive, DataSharingProcessor &dsp, + std::optional> + transforms = std::nullopt) { auto ivCallback = [&](mlir::Operation *op) { genLoopVars(op, converter, loc, iv, wrapperArgs); return llvm::SmallVector(iv); @@ -1996,6 +2055,65 @@ static mlir::omp::LoopNestOp genLoopNestOp( uint64_t nestValue = getCollapseValue(item->clauses); nestValue = nestValue < iv.size() ? iv.size() : nestValue; auto *nestedEval = getCollapsedLoopEval(eval, nestValue); + + if (!transforms.has_value()) { + // This must be a standalone construct, assume all following actions are + // transformations + transforms = llvm::make_range(std::next(item), queue.end()); + } + + for (auto &&transform : llvm::reverse(*transforms)) { + auto d = transform.id; + auto clauses = transform.clauses; + + switch (d) { + case llvm::omp::OMPD_interchange: { + llvm::SmallVector permutation; + + auto &&permutationClause = ClauseFinder::findUniqueClause< + Fortran::lower::omp::clause::Permutation>(clauses); + if (permutationClause) { + permutation.reserve(permutationClause->v.size()); + for (auto &&ts : permutationClause->v) { + permutation.push_back(evaluate::ToInt64(ts).value()); + } + // llvm::append_range( permutation, permutationClause->v); + + } else { + permutation = {2, 1}; + } + + assert(permutation.size() == iv.size() && + "TODO: if permutation is smaller than number of associated loops, " + "permute only the first loops"); + llvm::SmallVector newIVs; + llvm::SmallVector newLBs; + llvm::SmallVector newUBs; + llvm::SmallVector newINCs; + llvm::SmallVector newSizes; + + // TODO: Assert this is a valid permution + for (auto perm : permutation) { + newIVs.push_back(iv[perm - 1]); + newLBs.push_back(clauseOps.loopLowerBounds[perm - 1]); + newUBs.push_back(clauseOps.loopUpperBounds[perm - 1]); + newINCs.push_back(clauseOps.loopSteps[perm - 1]); + if (!clauseOps.tileSizes.empty()) + newSizes.push_back(clauseOps.tileSizes[perm - 1]); + } + + iv = newIVs; + clauseOps.loopLowerBounds = newLBs; + clauseOps.loopUpperBounds = newUBs; + clauseOps.loopSteps = newINCs; + clauseOps.tileSizes = newSizes; + + } break; + default: + llvm_unreachable("MK: loop transformation not yet implemented"); + } + } + return genOpWithBody( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, *nestedEval, directive) @@ -2368,6 +2486,68 @@ static void genUnrollOp(Fortran::lower::AbstractConverter &converter, mlir::omp::UnrollHeuristicOp::create(firOpBuilder, loc, cli); } +static void +collectLoops(lower::pft::Evaluation &eval, + llvm::SmallVectorImpl &result, + int numLoops) { + lower::pft::Evaluation *doConstructEval = &eval.getFirstNestedEvaluation(); + for ([[maybe_unused]] auto i : llvm::seq(numLoops)) { + lower::pft::Evaluation *doLoop = + &doConstructEval->getFirstNestedEvaluation(); + auto *doStmt = doLoop->getIf(); + assert(doStmt && "Expected do loop to be in the nested evaluation"); + const auto &loopControl = + std::get>(doStmt->t); + const parser::LoopControl::Bounds *bounds = + std::get_if(&loopControl->u); + assert(bounds && "Expected bounds for worksharing do loop"); + lower::StatementContext stmtCtx; + + result.push_back(doConstructEval); + + doConstructEval = + &*std::next(doConstructEval->getNestedEvaluations().begin()); + }; +} + +static void genStandaloneInterchangeOp( + Fortran::lower::AbstractConverter &converter, + Fortran::lower::SymMap &symTable, lower::StatementContext &stmtCtx, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, mlir::Location loc, + const ConstructQueue &queue, ConstructQueue::const_iterator item) { + auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); + auto transforms = llvm::make_range(q.end(), queue.end()); + assert(llvm::range_size(transforms) == 1); + auto &&transform = *transforms.begin(); + assert(transform.id == llvm::omp::OMPD_interchange); + auto clauses = transform.clauses; + + llvm::SmallVector permutation; + auto &&permutationClause = + ClauseFinder::findUniqueClause( + clauses); + if (permutationClause) { + permutation.reserve(permutationClause->v.size()); + for (auto &&ts : permutationClause->v) { + permutation.push_back(evaluate::ToInt64(ts).value()); + } + } else { + permutation = {2, 1}; + } + + llvm::SmallVector loops; + collectLoops(eval, loops, permutation.size()); + + // TODO: Assert this is a valid permution + llvm::SmallVector newLoops; + for (auto perm : permutation) { + newLoops.push_back(loops[perm - 1]); + } + + converter.genPermutatedLoops(newLoops, loops.back()); +} + static mlir::omp::MaskedOp genMaskedOp(lower::AbstractConverter &converter, lower::SymMap &symTable, lower::StatementContext &stmtCtx, @@ -3274,7 +3454,10 @@ static mlir::omp::DistributeOp genCompositeDistributeParallelDo( lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { - assert(std::distance(item, queue.end()) == 3 && "Invalid leaf constructs"); + auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); + auto transforms = llvm::make_range(q.end(), queue.end()); + + assert(llvm::range_size(q) == 3 && "Invalid leaf constructs"); ConstructQueue::const_iterator distributeItem = item; ConstructQueue::const_iterator parallelItem = std::next(distributeItem); ConstructQueue::const_iterator doItem = std::next(parallelItem); @@ -3328,10 +3511,10 @@ static mlir::omp::DistributeOp genCompositeDistributeParallelDo( converter, loc, wsloopClauseOps, wsloopArgs); wsloopOp.setComposite(/*val=*/true); - genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, doItem, - loopNestClauseOps, iv, - {{distributeOp, distributeArgs}, {wsloopOp, wsloopArgs}}, - llvm::omp::Directive::OMPD_distribute_parallel_do, dsp); + genLoopNestOp( + converter, symTable, semaCtx, eval, loc, queue, doItem, loopNestClauseOps, + iv, {{distributeOp, distributeArgs}, {wsloopOp, wsloopArgs}}, + llvm::omp::Directive::OMPD_distribute_parallel_do, dsp, transforms); return distributeOp; } @@ -3340,7 +3523,11 @@ static mlir::omp::DistributeOp genCompositeDistributeParallelDoSimd( lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { - assert(std::distance(item, queue.end()) == 4 && "Invalid leaf constructs"); + auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); + auto transforms = llvm::make_range(q.end(), queue.end()); + + assert(llvm::range_size(q) == 4 && "Invalid leaf constructs"); + ConstructQueue::const_iterator distributeItem = item; ConstructQueue::const_iterator parallelItem = std::next(distributeItem); ConstructQueue::const_iterator doItem = std::next(parallelItem); @@ -3423,7 +3610,7 @@ static mlir::omp::DistributeOp genCompositeDistributeParallelDoSimd( {wsloopOp, wsloopArgs}, {simdOp, simdArgs}}, llvm::omp::Directive::OMPD_distribute_parallel_do_simd, - simdItemDSP); + simdItemDSP, transforms); return distributeOp; } @@ -3432,7 +3619,11 @@ static mlir::omp::DistributeOp genCompositeDistributeSimd( lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { - assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs"); + auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); + auto transforms = llvm::make_range(q.end(), queue.end()); + + assert(llvm::range_size(q) == 2 && "Invalid leaf constructs"); + ConstructQueue::const_iterator distributeItem = item; ConstructQueue::const_iterator simdItem = std::next(distributeItem); @@ -3485,7 +3676,8 @@ static mlir::omp::DistributeOp genCompositeDistributeSimd( genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, simdItem, loopNestClauseOps, iv, {{distributeOp, distributeArgs}, {simdOp, simdArgs}}, - llvm::omp::Directive::OMPD_distribute_simd, simdItemDSP); + llvm::omp::Directive::OMPD_distribute_simd, simdItemDSP, + transforms); return distributeOp; } @@ -3494,7 +3686,11 @@ static mlir::omp::WsloopOp genCompositeDoSimd( lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { - assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs"); + auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); + auto transforms = llvm::make_range(q.end(), queue.end()); + + assert(llvm::range_size(q) == 2 && "Invalid leaf constructs"); + ConstructQueue::const_iterator doItem = item; ConstructQueue::const_iterator simdItem = std::next(doItem); @@ -3550,7 +3746,7 @@ static mlir::omp::WsloopOp genCompositeDoSimd( genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, simdItem, loopNestClauseOps, iv, {{wsloopOp, wsloopArgs}, {simdOp, simdArgs}}, - llvm::omp::Directive::OMPD_do_simd, simdItemDSP); + llvm::omp::Directive::OMPD_do_simd, simdItemDSP, transforms); return wsloopOp; } @@ -3559,7 +3755,10 @@ static mlir::omp::TaskloopOp genCompositeTaskloopSimd( lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { - assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs"); + auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); + + assert(llvm::range_size(q) == 2 && "Invalid leaf constructs"); + if (!semaCtx.langOptions().OpenMPSimd) TODO(loc, "Composite TASKLOOP SIMD"); return nullptr; @@ -3741,6 +3940,10 @@ static void genOMPDispatch(lower::AbstractConverter &converter, case llvm::omp::Directive::OMPD_unroll: genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; + case llvm::omp::Directive::OMPD_interchange: + genStandaloneInterchangeOp(converter, symTable, stmtCtx, semaCtx, eval, loc, + queue, item); + break; case llvm::omp::Directive::OMPD_workdistribute: newOp = genWorkdistributeOp(converter, symTable, semaCtx, eval, loc, queue, item); @@ -4399,16 +4602,33 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, mlir::Location currentLocation = converter.genLocation(beginSpec.source); + const parser::OmpDirectiveName &beginName = beginSpec.DirName(); + ConstructQueue queue{ + buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx, + eval, beginName.source, beginName.v, clauses)}; + for (auto &construct : std::get(loopConstruct.t)) { if (const parser::OpenMPLoopConstruct *ompNestedLoopCons = parser::omp::GetOmpLoop(construct)) { llvm::omp::Directive nestedDirective = parser::omp::GetOmpDirectiveName(*ompNestedLoopCons).v; + List nestedClauses = + makeClauses(ompNestedLoopCons->BeginDir().Clauses(), semaCtx); switch (nestedDirective) { case llvm::omp::Directive::OMPD_tile: // Skip OMPD_tile since the tile sizes will be retrieved when // generating the omp.loop_nest op. break; + case llvm::omp::Directive::OMPD_interchange: { + // MK: add the loop transformation to the end of the queue (i.e. applied + // first) + ConstructQueue nestedQueue{buildConstructQueue( + converter.getFirOpBuilder().getModule(), semaCtx, eval, + beginName.source, nestedDirective, nestedClauses)}; + for (auto nl : nestedQueue) { + queue.push_back(nl); + } + } break; default: { unsigned version = semaCtx.langOptions().OpenMPVersion; TODO(currentLocation, @@ -4420,10 +4640,6 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, } } - const parser::OmpDirectiveName &beginName = beginSpec.DirName(); - ConstructQueue queue{ - buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx, - eval, beginName.source, beginName.v, clauses)}; genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue, queue.begin()); } diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index e9ba5f386803a..6208eab88764e 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -16,6 +16,7 @@ #include "flang/Evaluate/fold.h" #include "flang/Evaluate/tools.h" #include +#include #include #include #include @@ -35,6 +36,8 @@ #include #include #include +#include +#include #include #include @@ -798,24 +801,62 @@ static void processTileSizesFromOpenMPConstruct( } } +static bool processInterchangePermutationFromOpenMPConstruct( + const parser::OpenMPConstruct *ompCons, + std::function processFun) { + if (!ompCons) + return false; + if (auto *ompLoop{std::get_if(&ompCons->u)}) { + if (auto *innerConstruct = ompLoop->GetNestedConstruct()) { + const parser::OmpDirectiveSpecification &innerBeginSpec = + innerConstruct->BeginDir(); + if (innerBeginSpec.DirId() == llvm::omp::Directive::OMPD_interchange) { + // Get the size values from parse tree and convert to a vector. + for (const auto &clause : innerBeginSpec.Clauses().v) { + if (const auto tclause{ + std::get_if(&clause.u)}) { + processFun(tclause); + break; + } + } + return true; + } + } + } + return false; +} + pft::Evaluation *getNestedDoConstruct(pft::Evaluation &eval) { - for (pft::Evaluation &nested : eval.getNestedEvaluations()) { - // In an OpenMPConstruct there can be compiler directives: - // 1 <> - // 2 CompilerDirective: !unroll - // <> -> 8 - if (nested.getIf()) - continue; - // Within a DoConstruct, there can be compiler directives, plus - // there is a DoStmt before the body: - // <> -> 8 - // 3 NonLabelDoStmt -> 7: do i = 1, n - // <> -> 7 - if (nested.getIf()) - continue; - assert(nested.getIf() && - "Unexpected construct in the nested evaluations"); - return &nested; + pft::Evaluation *curEval = &eval; + while (true) { + for (pft::Evaluation &nested : curEval->getNestedEvaluations()) { + // In an OpenMPConstruct there can be compiler directives: + // 1 <> + // 2 CompilerDirective: !unroll + // <> -> 8 + if (nested.getIf()) + continue; + // Within a DoConstruct, there can be compiler directives, plus + // there is a DoStmt before the body: + // <> -> 8 + // 3 NonLabelDoStmt -> 7: do i = 1, n + // <> -> 7 + if (nested.getIf()) + continue; + + if (nested.getIf()) + return &nested; + + // Follow innermost loop construct + if (auto &&ompCons = nested.getIf()) { + auto &&u = ompCons->u; + auto &&name = parser::omp::GetOmpDirectiveName(u); + curEval = &nested; + break; + } + + llvm_unreachable("Expected do loop to be in the nested evaluations"); + } } llvm_unreachable("Expected do loop to be in the nested evaluations"); } @@ -876,14 +917,26 @@ void collectLoopRelatedInfo( // Collect sizes from tile directive if present. std::int64_t sizesLengthValue = 0l; + std::int64_t permutationLengthValue = 0l; if (auto *ompCons{eval.getIf()}) { processTileSizesFromOpenMPConstruct( ompCons, [&](const parser::OmpClause::Sizes *tclause) { sizesLengthValue = tclause->v.size(); }); + + if (processInterchangePermutationFromOpenMPConstruct( + ompCons, [&](const parser::OmpClause::Permutation *tclause) { + permutationLengthValue = tclause->v.size(); + })) { + if (permutationLengthValue == 0) { + // default: permution(2,1) + permutationLengthValue = 2; + } + } } - std::int64_t collapseValue = std::max(numCollapse, sizesLengthValue); + std::int64_t collapseValue = + std::max({numCollapse, sizesLengthValue, permutationLengthValue}); std::size_t loopVarTypeSize = 0; do { lower::pft::Evaluation *doLoop = @@ -917,6 +970,40 @@ void collectLoopRelatedInfo( convertLoopBounds(converter, currentLocation, result, loopVarTypeSize); } +void collectPermutationFromOpenMPConstruct( + const parser::OpenMPConstruct *ompCons, + llvm::SmallVectorImpl &permutation, + Fortran::semantics::SemanticsContext &semaCtx) { + if (!ompCons) + return; + + if (auto *ompLoop{std::get_if(&ompCons->u)}) { + const parser::OpenMPLoopConstruct *innerConstruct = + ompLoop->GetNestedConstruct(); + + if (innerConstruct) { + const auto &innerLoopDirective = *innerConstruct; + const auto &innerBegin = innerLoopDirective.BeginDir(); + const auto &innerDirective = + Fortran::parser::omp::GetOmpDirectiveName(innerLoopDirective).v; + + if (innerDirective == llvm::omp::Directive::OMPD_interchange) { + // Get the size values from parse tree and convert to a vector + const auto &innerClauseList{innerBegin.Clauses()}; + // std::get(innerBegin.t)}; + for (const auto &clause : innerClauseList.v) + if (const auto tclause{ + std::get_if(&clause.u)}) { + for (auto &tval : tclause->v) { + if (const auto v{EvaluateInt64(semaCtx, tval)}) + permutation.push_back(*v); + } + } + } + } + } +} + } // namespace omp } // namespace lower } // namespace Fortran diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h index f707557197847..c935308bc7a3d 100644 --- a/flang/lib/Lower/OpenMP/Utils.h +++ b/flang/lib/Lower/OpenMP/Utils.h @@ -189,6 +189,11 @@ void collectTileSizesFromOpenMPConstruct( llvm::SmallVectorImpl &tileSizes, Fortran::semantics::SemanticsContext &semaCtx); +void collectPermutationFromOpenMPConstruct( + const parser::OpenMPConstruct *ompCons, + llvm::SmallVectorImpl &permuation, + Fortran::semantics::SemanticsContext &semaCtx); + } // namespace omp } // namespace lower } // namespace Fortran diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index 7838173d791a1..4311640233ea5 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -2487,6 +2487,7 @@ static constexpr DirectiveSet GetLoopDirectives() { unsigned(Directive::OMPD_fuse), unsigned(Directive::OMPD_tile), unsigned(Directive::OMPD_unroll), + unsigned(Directive::OMPD_interchange), }; return loopDirectives; } diff --git a/flang/lib/Semantics/check-omp-loop.cpp b/flang/lib/Semantics/check-omp-loop.cpp index 91b306a2a5ebd..ec48d26046a6e 100644 --- a/flang/lib/Semantics/check-omp-loop.cpp +++ b/flang/lib/Semantics/check-omp-loop.cpp @@ -233,7 +233,8 @@ void OmpStructureChecker::CheckSIMDNest(const parser::OpenMPConstruct &c) { const auto &beginName{c.BeginDir().DirName()}; if (beginName.v == llvm::omp::Directive::OMPD_simd || beginName.v == llvm::omp::Directive::OMPD_do_simd || - beginName.v == llvm::omp::Directive::OMPD_loop) { + beginName.v == llvm::omp::Directive::OMPD_loop || + IsLoopTransforming(beginName.v)) { eligibleSIMD = true; } }, diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 7e11be509cd02..48e664900a845 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -1028,8 +1028,8 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { void CollectNumAffectedLoopsFromInnerLoopContruct( const parser::OpenMPLoopConstruct &, llvm::SmallVector &, llvm::SmallVector &); - void CollectNumAffectedLoopsFromClauses(const parser::OmpClauseList &, - llvm::SmallVector &, + void CollectNumAffectedLoopsFromClauses(const parser::OpenMPLoopConstruct &x, + const parser::OmpClauseList &, llvm::SmallVector &, llvm::SmallVector &); Symbol::Flags dataSharingAttributeFlags{Symbol::Flag::OmpShared, @@ -2127,7 +2127,7 @@ bool OmpAttributeVisitor::Pre(const parser::DoConstruct &x) { } static bool isSizesClause(const parser::OmpClause *clause) { - return std::holds_alternative(clause->u); + return clause && std::holds_alternative(clause->u); } std::int64_t OmpAttributeVisitor::SetAssociatedMaxClause( @@ -2178,7 +2178,7 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromLoopConstruct( llvm::SmallVector &clauses) { const auto &clauseList{x.BeginDir().Clauses()}; - CollectNumAffectedLoopsFromClauses(clauseList, levels, clauses); + CollectNumAffectedLoopsFromClauses(x, clauseList, levels, clauses); CollectNumAffectedLoopsFromInnerLoopContruct(x, levels, clauses); } @@ -2195,8 +2195,13 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromInnerLoopContruct( } void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses( - const parser::OmpClauseList &x, llvm::SmallVector &levels, + const parser::OpenMPLoopConstruct &y, const parser::OmpClauseList &x, + llvm::SmallVector &levels, llvm::SmallVector &clauses) { + const auto &beginLoopDir{y.BeginDir()}; + const auto &dirClauses{beginLoopDir.Clauses()}; + auto ytv = Fortran::parser::omp::GetOmpDirectiveName(y).v; + for (const auto &clause : x.v) { if (const auto oclause{ std::get_if(&clause.u)}) { @@ -2223,6 +2228,20 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses( clauses.push_back(&clause); } } + + if (ytv == llvm::omp::OMPD_interchange) { + for (const auto &clause : dirClauses.v) { + if (const auto tclause{ + std::get_if(&clause.u)}) { + levels.push_back(tclause->v.size()); + clauses.push_back(&clause); + return; + } + } + + levels.push_back(2); + clauses.push_back(nullptr); + } } void OmpAttributeVisitor::CheckPerfectNestAndRectangularLoop( diff --git a/openmp/runtime/test/transform/interchange/distribute-parallel-wsloop-intdo.f90 b/openmp/runtime/test/transform/interchange/distribute-parallel-wsloop-intdo.f90 new file mode 100644 index 0000000000000..b5ef5214f5064 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/distribute-parallel-wsloop-intdo.f90 @@ -0,0 +1,37 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP TEAMS NUM_TEAMS(1) + + !$OMP DISTRIBUTE PARALLEL DO SCHEDULE(static,2) NUM_THREADS(1) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END DISTRIBUTE PARALLEL DO + + !$OMP END TEAMS + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/distribute-parallel-wsloop-simd-intdo.f90 b/openmp/runtime/test/transform/interchange/distribute-parallel-wsloop-simd-intdo.f90 new file mode 100644 index 0000000000000..2a192cad017a6 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/distribute-parallel-wsloop-simd-intdo.f90 @@ -0,0 +1,37 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP TEAMS NUM_TEAMS(1) + + !$OMP DISTRIBUTE PARALLEL DO SIMD SCHEDULE(static,2) NUM_THREADS(1) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END DISTRIBUTE PARALLEL DO SIMD + + !$OMP END TEAMS + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/distribute-simd-intdo.f90 b/openmp/runtime/test/transform/interchange/distribute-simd-intdo.f90 new file mode 100644 index 0000000000000..fce62b7f3ccda --- /dev/null +++ b/openmp/runtime/test/transform/interchange/distribute-simd-intdo.f90 @@ -0,0 +1,37 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP TEAMS NUM_TEAMS(1) + + !$OMP DISTRIBUTE SIMD + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END DISTRIBUTE SIMD + + !$OMP END TEAMS + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/intdo-permutation.f90 b/openmp/runtime/test/transform/interchange/intdo-permutation.f90 new file mode 100644 index 0000000000000..a8a8e7f35d018 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/intdo-permutation.f90 @@ -0,0 +1,42 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_intdo + integer :: i, j + print *, 'do' + + !$OMP INTERCHANGE PERMUTATION(2,3,1) + do i = 7, 15, 3 + do j = -1, 1, 2 + do k = 3, 1, -1 + print '("i=", I0, " j=", I0, " k=", I0)', i, j, k + end do + end do + end do + !$OMP END INTERCHANGE + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 k=3 +! CHECK-NEXT: i=10 j=-1 k=3 +! CHECK-NEXT: i=13 j=-1 k=3 +! CHECK-NEXT: i=7 j=-1 k=2 +! CHECK-NEXT: i=10 j=-1 k=2 +! CHECK-NEXT: i=13 j=-1 k=2 +! CHECK-NEXT: i=7 j=-1 k=1 +! CHECK-NEXT: i=10 j=-1 k=1 +! CHECK-NEXT: i=13 j=-1 k=1 +! CHECK-NEXT: i=7 j=1 k=3 +! CHECK-NEXT: i=10 j=1 k=3 +! CHECK-NEXT: i=13 j=1 k=3 +! CHECK-NEXT: i=7 j=1 k=2 +! CHECK-NEXT: i=10 j=1 k=2 +! CHECK-NEXT: i=13 j=1 k=2 +! CHECK-NEXT: i=7 j=1 k=1 +! CHECK-NEXT: i=10 j=1 k=1 +! CHECK-NEXT: i=13 j=1 k=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/intdo.f90 b/openmp/runtime/test/transform/interchange/intdo.f90 new file mode 100644 index 0000000000000..fe6820f41dba6 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/intdo.f90 @@ -0,0 +1,31 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_intdo + integer :: i, j + print *, 'do' + + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-intdo.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-intdo.f90 new file mode 100644 index 0000000000000..498534374ea30 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-intdo.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_collapse_intdo + integer :: i, j + print *, 'do' + + !$OMP PARALLEL DO COLLAPSE(2) NUM_THREADS(1) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-intdo.o b/openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-intdo.o new file mode 100644 index 0000000000000..a0abcfdf74fda Binary files /dev/null and b/openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-intdo.o differ diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-collapse.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-collapse.f90 new file mode 100644 index 0000000000000..4285edaa775b8 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-collapse.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP PARALLEL DO COLLAPSE(2) NUM_THREADS(1) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-firstprivate.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-firstprivate.f90 new file mode 100644 index 0000000000000..e53bb107bad2b --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-firstprivate.f90 @@ -0,0 +1,35 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j, k + print *, 'do' + + k = 1 + !$OMP PARALLEL DO NUM_THREADS(3) FIRSTPRIVATE(k) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + k = k + 1 + print '("i=", I0, " j=", I0, " k=", I0)', i, j, k + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-DAG: i=7 j=-1 k=2 +! CHECK-DAG: i=10 j=-1 k=3 +! CHECK-DAG: i=13 j=-1 k=4 +! CHECK-DAG: i=7 j=0 k=2 +! CHECK-DAG: i=10 j=0 k=3 +! CHECK-DAG: i=13 j=0 k=4 +! CHECK-DAG: i=7 j=1 k=2 +! CHECK-DAG: i=10 j=1 k=3 +! CHECK-DAG: i=13 j=1 k=4 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-lastprivate.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-lastprivate.f90 new file mode 100644 index 0000000000000..42d7032bd2184 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-lastprivate.f90 @@ -0,0 +1,28 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines +! XFAIL: * + +program interchange_wsloop_intdo + integer :: i, j, k + print *, 'do' + + + !$OMP PARALLEL DO LASTPRIVATE(k) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + k = i*10 + j + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' + print '("k=", I0)', k +end program + + +! CHECK: do +! CHECK-NEXT: done +! CHECK-NEXT: k=131 diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-permutation.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-permutation.f90 new file mode 100644 index 0000000000000..e52389f2448e4 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-permutation.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP PARALLEL DO NUM_THREADS(1) + !$OMP INTERCHANGE PERMUTATION(2,1) + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private-i.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private-i.f90 new file mode 100644 index 0000000000000..76928ce93577e --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private-i.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP PARALLEL DO NUM_THREADS(1) PRIVATE(i) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private-j.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private-j.f90 new file mode 100644 index 0000000000000..a679c921e9660 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private-j.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP PARALLEL DO NUM_THREADS(1) PRIVATE(j) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private.f90 new file mode 100644 index 0000000000000..372ff573a10d2 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private.f90 @@ -0,0 +1,34 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j, k + print *, 'do' + + !$OMP PARALLEL DO NUM_THREADS(4) PRIVATE(k) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + k = i + j + print '("i=", I0, " j=", I0, " k=", I0)', i, j, k + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-DAG: i=7 j=-1 k=6 +! CHECK-DAG: i=10 j=-1 k=9 +! CHECK-DAG: i=13 j=-1 k=12 +! CHECK-DAG: i=7 j=0 k=7 +! CHECK-DAG: i=10 j=0 k=10 +! CHECK-DAG: i=13 j=0 k=13 +! CHECK-DAG: i=7 j=1 k=8 +! CHECK-DAG: i=10 j=1 k=11 +! CHECK-DAG: i=13 j=1 k=14 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-reduction.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-reduction.f90 new file mode 100644 index 0000000000000..8d313becef862 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-reduction.f90 @@ -0,0 +1,27 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j, k + print *, 'do' + + k = 1 + !$OMP PARALLEL DO REDUCTION(+:k) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + k = k + 1 + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' + print '("k=", I0)', k +end program + + +! CHECK: do +! CHECK-NEXT: done +! CHECK-NEXT: k=10 diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo.f90 new file mode 100644 index 0000000000000..cfa3bddf5c8d5 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP PARALLEL DO NUM_THREADS(1) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/target-teams-distribute-parallel-wsloop-intdo.f90 b/openmp/runtime/test/transform/interchange/target-teams-distribute-parallel-wsloop-intdo.f90 new file mode 100644 index 0000000000000..81e681b55eb1d --- /dev/null +++ b/openmp/runtime/test/transform/interchange/target-teams-distribute-parallel-wsloop-intdo.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program target_teams_distribute_parallel_do + integer :: i, j + print *, 'do' + + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO SCHEDULE(static,2) NUM_TEAMS(1) NUM_THREADS(1) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/taskloop-intdo.f90 b/openmp/runtime/test/transform/interchange/taskloop-intdo.f90 new file mode 100644 index 0000000000000..80e0ee62e020b --- /dev/null +++ b/openmp/runtime/test/transform/interchange/taskloop-intdo.f90 @@ -0,0 +1,33 @@ +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP TASKLOOP + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END TASKLOOP + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/taskloop-simd-intdo.f90 b/openmp/runtime/test/transform/interchange/taskloop-simd-intdo.f90 new file mode 100644 index 0000000000000..d84be9d1d7a96 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/taskloop-simd-intdo.f90 @@ -0,0 +1,34 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines +! XFAIL: * + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP TASKLOOP SIMD + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END TASKLOOP SIMD + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/teams-distribute-parallel-wsloop-intdo.f90 b/openmp/runtime/test/transform/interchange/teams-distribute-parallel-wsloop-intdo.f90 new file mode 100644 index 0000000000000..6d7fe1afdcdd5 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/teams-distribute-parallel-wsloop-intdo.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program teams_distribute_parallel_do + integer :: i, j + print *, 'do' + + !$OMP TEAMS DISTRIBUTE PARALLEL DO SCHEDULE(static,2) NUM_TEAMS(1) NUM_THREADS(1) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END TEAMS DISTRIBUTE PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/wsloop-intdo.f90 b/openmp/runtime/test/transform/interchange/wsloop-intdo.f90 new file mode 100644 index 0000000000000..32b1b87a9e859 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/wsloop-intdo.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP DO SCHEDULE(static,2) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/wsloop-simd-intdo.f90 b/openmp/runtime/test/transform/interchange/wsloop-simd-intdo.f90 new file mode 100644 index 0000000000000..56ed14b165fa3 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/wsloop-simd-intdo.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP DO SIMD SCHEDULE(static,2) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END DO SIMD + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/tile/intdo.f90 b/openmp/runtime/test/transform/tile/intdo.f90 new file mode 100644 index 0000000000000..27cd383a69fea --- /dev/null +++ b/openmp/runtime/test/transform/tile/intdo.f90 @@ -0,0 +1,57 @@ +! This test checks lowering of OpenMP tile directive + +! RUN: %flang %flags %openmp_flags -fopenmp-version=51 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + + +program tile_intdo + integer i, j + print *, 'do' + + !$OMP TILE SIZES(2,2) + do i=7, 18, 3 + do j=7, 19, 3 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END TILE + + print *, 'done' +end program + + +! CHECK: do + +! Complete tile +! CHECK-NEXT: i=7 j=7 +! CHECK-NEXT: i=7 j=10 +! CHECK-NEXT: i=10 j=7 +! CHECK-NEXT: i=10 j=10 + +! Complete tile +! CHECK-NEXT: i=7 j=13 +! CHECK-NEXT: i=7 j=16 +! CHECK-NEXT: i=10 j=13 +! CHECK-NEXT: i=10 j=16 + +! Partial tile +! CHECK-NEXT: i=7 j=19 +! CHECK-NEXT: i=10 j=19 + +! Complete tile +! CHECK-NEXT: i=13 j=7 +! CHECK-NEXT: i=13 j=10 +! CHECK-NEXT: i=16 j=7 +! CHECK-NEXT: i=16 j=10 + +! Complete tile +! CHECK-NEXT: i=13 j=13 +! CHECK-NEXT: i=13 j=16 +! CHECK-NEXT: i=16 j=13 +! CHECK-NEXT: i=16 j=16 + +! Partial tile +! CHECK-NEXT: i=13 j=19 +! CHECK-NEXT: i=16 j=19 + +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/tile/parallel-wsloop-collapse-intdo.f90 b/openmp/runtime/test/transform/tile/parallel-wsloop-collapse-intdo.f90 new file mode 100644 index 0000000000000..66bddf30e045a --- /dev/null +++ b/openmp/runtime/test/transform/tile/parallel-wsloop-collapse-intdo.f90 @@ -0,0 +1,59 @@ +! This test checks lowering of OpenMP tile directive + +! RUN: %flang %flags %openmp_flags -fopenmp-version=51 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + + +program tile_wsloop_collapse_intdo + integer i, j + print *, 'do' + + !$OMP PARALLEL DO COLLAPSE(2) NUM_THREADS(1) + !$OMP TILE SIZES(2,2) + do i=7, 18, 3 + do j=7, 19, 3 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END TILE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do + +! Complete tile +! CHECK-NEXT: i=7 j=7 +! CHECK-NEXT: i=7 j=10 +! CHECK-NEXT: i=10 j=7 +! CHECK-NEXT: i=10 j=10 + +! Complete tile +! CHECK-NEXT: i=7 j=13 +! CHECK-NEXT: i=7 j=16 +! CHECK-NEXT: i=10 j=13 +! CHECK-NEXT: i=10 j=16 + +! Partial tile +! CHECK-NEXT: i=7 j=19 +! CHECK-NEXT: i=10 j=19 + +! Complete tile +! CHECK-NEXT: i=13 j=7 +! CHECK-NEXT: i=13 j=10 +! CHECK-NEXT: i=16 j=7 +! CHECK-NEXT: i=16 j=10 + +! Complete tile +! CHECK-NEXT: i=13 j=13 +! CHECK-NEXT: i=13 j=16 +! CHECK-NEXT: i=16 j=13 +! CHECK-NEXT: i=16 j=16 + +! Partial tile +! CHECK-NEXT: i=13 j=19 +! CHECK-NEXT: i=16 j=19 + +! CHECK-NEXT: done