From 14d7115fdf75fd1cdbab38e9810976794ebf9eba Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 14 Aug 2025 12:05:43 +0200 Subject: [PATCH 01/64] [flang][OpenMP] MSVC buildbot fix PR #153488 caused the msvc build (https://lab.llvm.org/buildbot/#/builders/166/builds/1397) to fail: ``` ..\llvm-project\flang\include\flang/Evaluate/rewrite.h(78): error C2668: 'Fortran::evaluate::rewrite::Identity::operator ()': ambiguous call to overloaded function ..\llvm-project\flang\include\flang/Evaluate/rewrite.h(43): note: could be 'Fortran::evaluate::Expr Fortran::evaluate::rewrite::Identity::operator ()(Fortran::evaluate::Expr &&,const U &)' with [ S=Fortran::evaluate::value::Integer<128,true,32,unsigned int,unsigned __int64,128>, U=Fortran::evaluate::value::Integer<128,true,32,unsigned int,unsigned __int64,128> ] ..\llvm-project\flang\lib\Semantics\check-omp-atomic.cpp(174): note: or 'Fortran::evaluate::Expr Fortran::semantics::ReassocRewriter::operator ()(Fortran::evaluate::Expr &&,const U &,Fortran::semantics::ReassocRewriter::NonIntegralTag)' with [ S=Fortran::evaluate::value::Integer<128,true,32,unsigned int,unsigned __int64,128>, U=Fortran::evaluate::value::Integer<128,true,32,unsigned int,unsigned __int64,128> ] ..\llvm-project\flang\include\flang/Evaluate/rewrite.h(78): note: while trying to match the argument list '(Fortran::evaluate::Expr, const S)' with [ S=Fortran::evaluate::value::Integer<128,true,32,unsigned int,unsigned __int64,128> ] ..\llvm-project\flang\include\flang/Evaluate/rewrite.h(78): note: the template instantiation context (the oldest one first) is ..\llvm-project\flang\lib\Semantics\check-omp-atomic.cpp(814): note: see reference to function template instantiation 'U Fortran::evaluate::rewrite::Mutator::operator ()&,Fortran::evaluate::Expr>(T)' being compiled with [ U=Fortran::evaluate::Expr, T=const Fortran::evaluate::Expr & ] ``` The reason is that there is an ambiguite between operator() of ReassocRewriter itself as operator() of the base class Identity through `using Id::operator();`. By the C++ specification, method declarations in ReassocRewriter hide methods with the same signature from a using declaration, but this does not apply to ``` evaluate::Expr operator()(..., NonIntegralTag = {}) ``` which has a different signature due to an additional tag parameter. Since it has a default value, it is ambiguous with operator() without tag parameter. GCC and Clang both accept this, but in my understanding MSVC is correct here. Since the overloads of ReassocRewriter cover all cases, remopving the using reclarations to avoid the ambiguity. --- flang/lib/Semantics/check-omp-atomic.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/flang/lib/Semantics/check-omp-atomic.cpp b/flang/lib/Semantics/check-omp-atomic.cpp index 62bb2fdc6048e..50e63d356be02 100644 --- a/flang/lib/Semantics/check-omp-atomic.cpp +++ b/flang/lib/Semantics/check-omp-atomic.cpp @@ -86,7 +86,6 @@ ReassocOp reassocOp(const Op0 &op0, const Op1 &op1) { struct ReassocRewriter : public evaluate::rewrite::Identity { using Id = evaluate::rewrite::Identity; - using Id::operator(); struct NonIntegralTag {}; ReassocRewriter(const SomeExpr &atom) : atom_(atom) {} From 036ee24936a7d77f91a7690a3d63c932be7298a1 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Fri, 14 Mar 2025 11:06:51 -0400 Subject: [PATCH 02/64] Initial implementation of tiling. --- flang/include/flang/Lower/OpenMP.h | 1 - flang/lib/Lower/OpenMP/OpenMP.cpp | 70 +++++-- flang/lib/Lower/OpenMP/Utils.cpp | 33 +++- flang/lib/Semantics/canonicalize-omp.cpp | 44 ++++- flang/lib/Semantics/resolve-directives.cpp | 176 ++++++++++++++---- .../Frontend/OpenMP/ConstructDecompositionT.h | 24 +++ .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 9 + llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 24 +++ llvm/lib/Transforms/Utils/CodeExtractor.cpp | 7 +- .../mlir/Dialect/OpenMP/OpenMPClauses.td | 32 ++++ mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 4 +- .../Conversion/SCFToOpenMP/SCFToOpenMP.cpp | 3 +- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 52 +++++- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 65 +++++-- 14 files changed, 458 insertions(+), 86 deletions(-) diff --git a/flang/include/flang/Lower/OpenMP.h b/flang/include/flang/Lower/OpenMP.h index 581c93f76d627..df01a7b82c66c 100644 --- a/flang/include/flang/Lower/OpenMP.h +++ b/flang/include/flang/Lower/OpenMP.h @@ -80,7 +80,6 @@ void genOpenMPDeclarativeConstruct(AbstractConverter &, void genOpenMPSymbolProperties(AbstractConverter &converter, const pft::Variable &var); -int64_t getCollapseValue(const Fortran::parser::OmpClauseList &clauseList); void genThreadprivateOp(AbstractConverter &, const pft::Variable &); void genDeclareTargetIntGlobal(AbstractConverter &, const pft::Variable &); bool isOpenMPTargetConstruct(const parser::OpenMPConstruct &); diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index ec2ec37e623f8..a01bb371411ad 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -404,6 +404,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, return; const parser::OmpClauseList *beginClauseList = nullptr; + const parser::OmpClauseList *middleClauseList = nullptr; const parser::OmpClauseList *endClauseList = nullptr; common::visit( common::visitors{ @@ -418,6 +419,22 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, beginClauseList = &std::get(beginDirective.t); + // FIXME(JAN): For now we check if there is an inner + // OpenMPLoopConstruct, and extract the size clause from there + const auto &innerOptional = std::get>>( + ompConstruct.t); + if (innerOptional.has_value()) { + const auto &innerLoopDirective = innerOptional.value().value(); + const auto &innerBegin = + std::get(innerLoopDirective.t); + const auto &innerDirective = + std::get(innerBegin.t); + if (innerDirective.v == llvm::omp::Directive::OMPD_tile) { + middleClauseList = + &std::get(innerBegin.t); + } + } if (auto &endDirective = std::get>( ompConstruct.t)) { @@ -431,6 +448,9 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, assert(beginClauseList && "expected begin directive"); clauses.append(makeClauses(*beginClauseList, semaCtx)); + if (middleClauseList) + clauses.append(makeClauses(*middleClauseList, semaCtx)); + if (endClauseList) clauses.append(makeClauses(*endClauseList, semaCtx)); }; @@ -910,6 +930,7 @@ static void genLoopVars( storeOp = createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol); } + firOpBuilder.setInsertionPointAfter(storeOp); } @@ -1660,6 +1681,23 @@ genLoopNestClauses(lower::AbstractConverter &converter, cp.processCollapse(loc, eval, clauseOps, iv); clauseOps.loopInclusive = converter.getFirOpBuilder().getUnitAttr(); + + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + for (auto &clause : clauses) { + if (clause.id == llvm::omp::Clause::OMPC_collapse) { + const auto &collapse = std::get(clause.u); + int64_t collapseValue = evaluate::ToInt64(collapse.v).value(); + clauseOps.numCollapse = firOpBuilder.getI64IntegerAttr(collapseValue); + } else if (clause.id == llvm::omp::Clause::OMPC_sizes) { + const auto &sizes = std::get(clause.u); + llvm::SmallVector sizeValues; + for (auto &size : sizes.v) { + int64_t sizeValue = evaluate::ToInt64(size).value(); + sizeValues.push_back(sizeValue); + } + clauseOps.tileSizes = sizeValues; + } + } } static void genLoopClauses( @@ -2036,9 +2074,9 @@ static mlir::omp::LoopNestOp genLoopNestOp( return llvm::SmallVector(iv); }; - auto *nestedEval = - getCollapsedLoopEval(eval, getCollapseValue(item->clauses)); - + uint64_t nestValue = getCollapseValue(item->clauses); + nestValue = nestValue < iv.size() ? iv.size() : nestValue; + auto *nestedEval = getCollapsedLoopEval(eval, nestValue); return genOpWithBody( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, *nestedEval, directive) @@ -3897,6 +3935,20 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, std::get(loopConstruct.t); List clauses = makeClauses( std::get(beginLoopDirective.t), semaCtx); + + const auto &innerOptional = std::get>>(loopConstruct.t); + if (innerOptional.has_value()) { + const auto &innerLoopDirective = innerOptional.value().value(); + const auto &innerBegin = + std::get(innerLoopDirective.t); + const auto &innerDirective = + std::get(innerBegin.t); + if (innerDirective.v == llvm::omp::Directive::OMPD_tile) { + clauses.append( + makeClauses(std::get(innerBegin.t), semaCtx)); + } + } + if (auto &endLoopDirective = std::get>( loopConstruct.t)) { @@ -4028,18 +4080,6 @@ void Fortran::lower::genOpenMPSymbolProperties( lower::genDeclareTargetIntGlobal(converter, var); } -int64_t -Fortran::lower::getCollapseValue(const parser::OmpClauseList &clauseList) { - for (const parser::OmpClause &clause : clauseList.v) { - if (const auto &collapseClause = - std::get_if(&clause.u)) { - const auto *expr = semantics::GetExpr(collapseClause->v); - return evaluate::ToInt64(*expr).value(); - } - } - return 1; -} - void Fortran::lower::genThreadprivateOp(lower::AbstractConverter &converter, const lower::pft::Variable &var) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 77b1e39083aa6..11721d05001b0 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -38,14 +38,22 @@ namespace lower { namespace omp { int64_t getCollapseValue(const List &clauses) { - auto iter = llvm::find_if(clauses, [](const Clause &clause) { - return clause.id == llvm::omp::Clause::OMPC_collapse; - }); - if (iter != clauses.end()) { - const auto &collapse = std::get(iter->u); - return evaluate::ToInt64(collapse.v).value(); + int64_t collapseValue = 1; + int64_t numTileSizes = 0; + for (auto &clause : clauses) { + if (clause.id == llvm::omp::Clause::OMPC_collapse) { + const auto &collapse = std::get(clause.u); + collapseValue = evaluate::ToInt64(collapse.v).value(); + } else if (clause.id == llvm::omp::Clause::OMPC_sizes) { + const auto &sizes = std::get(clause.u); + numTileSizes = sizes.v.size(); + } } - return 1; + + collapseValue = collapseValue - numTileSizes; + int64_t result = + collapseValue > numTileSizes ? collapseValue : numTileSizes; + return result; } void genObjectList(const ObjectList &objects, @@ -613,6 +621,7 @@ bool collectLoopRelatedInfo( lower::pft::Evaluation &eval, const omp::List &clauses, mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl &iv) { + bool found = false; fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); @@ -628,7 +637,16 @@ bool collectLoopRelatedInfo( collapseValue = evaluate::ToInt64(clause->v).value(); found = true; } + std::int64_t sizesLengthValue = 0l; + if (auto *clause = + ClauseFinder::findUniqueClause(clauses)) { + sizesLengthValue = clause->v.size(); + found = true; + } + collapseValue = collapseValue - sizesLengthValue; + collapseValue = + collapseValue < sizesLengthValue ? sizesLengthValue : collapseValue; std::size_t loopVarTypeSize = 0; do { lower::pft::Evaluation *doLoop = @@ -661,7 +679,6 @@ bool collectLoopRelatedInfo( } while (collapseValue > 0); convertLoopBounds(converter, currentLocation, result, loopVarTypeSize); - return found; } diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index 9722eca19447d..fb0bb0f923574 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -11,6 +11,7 @@ #include "flang/Parser/parse-tree.h" #include "flang/Semantics/semantics.h" +# include // After Loop Canonicalization, rewrite OpenMP parse tree to make OpenMP // Constructs more structured which provide explicit scopes for later // structural checks and semantic analysis. @@ -117,15 +118,17 @@ class CanonicalizationOfOmp { // in the same iteration // // Original: - // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct - // OmpBeginLoopDirective + // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct t-> + // OmpBeginLoopDirective t-> OmpLoopDirective + // [ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct u-> + /// OmpBeginLoopDirective t-> OmpLoopDirective t-> Tile v-> OMP_tile] // ExecutableConstruct -> DoConstruct + // [ExecutableConstruct -> OmpEndLoopDirective] // ExecutableConstruct -> OmpEndLoopDirective (if available) // // After rewriting: - // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct - // OmpBeginLoopDirective - // DoConstruct + // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct t-> + // OmpBeginLoopDirective t -> OmpLoopDirective -> DoConstruct // OmpEndLoopDirective (if available) parser::Block::iterator nextIt; auto &beginDir{std::get(x.t)}; @@ -147,20 +150,41 @@ class CanonicalizationOfOmp { if (GetConstructIf(*nextIt)) continue; + // Keep track of the loops to handle the end loop directives + std::stack loops; + loops.push(&x); + while (auto *innerConstruct{ + GetConstructIf(*nextIt)}) { + if (auto *innerOmpLoop{ + std::get_if(&innerConstruct->u)}) { + std::get< + std::optional>>( + loops.top()->t) = std::move(*innerOmpLoop); + // Retrieveing the address so that DoConstruct or inner loop can be + // set later. + loops.push(&(std::get>>( + loops.top()->t) + .value() + .value())); + nextIt = block.erase(nextIt); + } + } if (auto *doCons{GetConstructIf(*nextIt)}) { if (doCons->GetLoopControl()) { // move DoConstruct std::get>>>(x.t) = - std::move(*doCons); + common::Indirection>>>( + loops.top()->t) = std::move(*doCons); nextIt = block.erase(nextIt); // try to match OmpEndLoopDirective - if (nextIt != block.end()) { + while (nextIt != block.end() && !loops.empty()) { if (auto *endDir{ GetConstructIf(*nextIt)}) { - std::get>(x.t) = - std::move(*endDir); + std::get>( + loops.top()->t) = std::move(*endDir); nextIt = block.erase(nextIt); + loops.pop(); } } } else { diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 6a4660c9882ab..0e87dcfdbbbb8 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -817,7 +817,28 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { const parser::OmpClause *GetAssociatedClause() { return associatedClause; } private: + std::int64_t SetAssociatedMaxClause(llvm::SmallVector &, + llvm::SmallVector &); + std::int64_t GetAssociatedLoopLevelFromLoopConstruct( + const parser::OpenMPLoopConstruct &); std::int64_t GetAssociatedLoopLevelFromClauses(const parser::OmpClauseList &); + void CollectAssociatedLoopLevelsFromLoopConstruct( + const parser::OpenMPLoopConstruct &, llvm::SmallVector &, + llvm::SmallVector &); + void CollectAssociatedLoopLevelsFromInnerLoopContruct( + const parser::OpenMPLoopConstruct &, llvm::SmallVector &, + llvm::SmallVector &); + template + void CollectAssociatedLoopLevelFromClauseValue( + const parser::OmpClause &clause, llvm::SmallVector &, + llvm::SmallVector &); + template + void CollectAssociatedLoopLevelFromClauseSize(const parser::OmpClause &, + llvm::SmallVector &, + llvm::SmallVector &); + void CollectAssociatedLoopLevelsFromClauses(const parser::OmpClauseList &, + llvm::SmallVector &, + llvm::SmallVector &); Symbol::Flags dataSharingAttributeFlags{Symbol::Flag::OmpShared, Symbol::Flag::OmpPrivate, Symbol::Flag::OmpFirstPrivate, @@ -1822,7 +1843,6 @@ bool OmpAttributeVisitor::Pre( bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { const auto &beginLoopDir{std::get(x.t)}; const auto &beginDir{std::get(beginLoopDir.t)}; - const auto &clauseList{std::get(beginLoopDir.t)}; switch (beginDir.v) { case llvm::omp::Directive::OMPD_distribute: case llvm::omp::Directive::OMPD_distribute_parallel_do: @@ -1873,7 +1893,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { beginDir.v == llvm::omp::Directive::OMPD_target_loop) IssueNonConformanceWarning(beginDir.v, beginDir.source, 52); ClearDataSharingAttributeObjects(); - SetContextAssociatedLoopLevel(GetAssociatedLoopLevelFromClauses(clauseList)); + SetContextAssociatedLoopLevel(GetAssociatedLoopLevelFromLoopConstruct(x)); if (beginDir.v == llvm::omp::Directive::OMPD_do) { auto &optLoopCons = std::get>(x.t); @@ -1887,7 +1907,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { } } PrivatizeAssociatedLoopIndexAndCheckLoopLevel(x); - ordCollapseLevel = GetAssociatedLoopLevelFromClauses(clauseList) + 1; + ordCollapseLevel = GetAssociatedLoopLevelFromLoopConstruct(x) + 1; return true; } @@ -1975,44 +1995,124 @@ bool OmpAttributeVisitor::Pre(const parser::DoConstruct &x) { return true; } +static bool isSizesClause(const parser::OmpClause *clause) { + return std::holds_alternative(clause->u); +} + +std::int64_t OmpAttributeVisitor::SetAssociatedMaxClause( + llvm::SmallVector &levels, + llvm::SmallVector &clauses) { + + // Find the tile level to know how much to reduce the level for collapse + std::int64_t tileLevel = 0; + for (auto [level, clause] : llvm::zip_equal(levels, clauses)) { + if (isSizesClause(clause)) { + tileLevel = level; + } + } + + std::int64_t maxLevel = 1; + const parser::OmpClause *maxClause = nullptr; + for (auto [level, clause] : llvm::zip_equal(levels, clauses)) { + if (tileLevel > 0 && tileLevel < level) { + context_.Say(clause->source, + "The value of the parameter in the COLLAPSE clause must" + " not be larger than the number of the number of tiled loops" + " because collapse relies on independent loop iterations."_err_en_US); + return 1; + } + + if (!isSizesClause(clause)) { + level = level - tileLevel; + } + + if (level > maxLevel) { + maxLevel = level; + maxClause = clause; + } + } + if (maxClause) + SetAssociatedClause(maxClause); + return maxLevel; +} + +std::int64_t OmpAttributeVisitor::GetAssociatedLoopLevelFromLoopConstruct( + const parser::OpenMPLoopConstruct &x) { + llvm::SmallVector levels; + llvm::SmallVector clauses; + + CollectAssociatedLoopLevelsFromLoopConstruct(x, levels, clauses); + return SetAssociatedMaxClause(levels, clauses); +} + std::int64_t OmpAttributeVisitor::GetAssociatedLoopLevelFromClauses( const parser::OmpClauseList &x) { - std::int64_t orderedLevel{0}; - std::int64_t collapseLevel{0}; + llvm::SmallVector levels; + llvm::SmallVector clauses; - const parser::OmpClause *ordClause{nullptr}; - const parser::OmpClause *collClause{nullptr}; + CollectAssociatedLoopLevelsFromClauses(x, levels, clauses); + return SetAssociatedMaxClause(levels, clauses); +} - for (const auto &clause : x.v) { - if (const auto *orderedClause{ - std::get_if(&clause.u)}) { - if (const auto v{EvaluateInt64(context_, orderedClause->v)}) { - orderedLevel = *v; - } - ordClause = &clause; - } - if (const auto *collapseClause{ - std::get_if(&clause.u)}) { - if (const auto v{EvaluateInt64(context_, collapseClause->v)}) { - collapseLevel = *v; - } - collClause = &clause; +void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromLoopConstruct( + const parser::OpenMPLoopConstruct &x, + llvm::SmallVector &levels, + llvm::SmallVector &clauses) { + const auto &beginLoopDir{std::get(x.t)}; + const auto &clauseList{std::get(beginLoopDir.t)}; + + CollectAssociatedLoopLevelsFromClauses(clauseList, levels, clauses); + CollectAssociatedLoopLevelsFromInnerLoopContruct(x, levels, clauses); +} + +void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromInnerLoopContruct( + const parser::OpenMPLoopConstruct &x, + llvm::SmallVector &levels, + llvm::SmallVector &clauses) { + const auto &innerOptional = + std::get>>( + x.t); + if (innerOptional.has_value()) { + CollectAssociatedLoopLevelsFromLoopConstruct( + innerOptional.value().value(), levels, clauses); + } +} + +template +void OmpAttributeVisitor::CollectAssociatedLoopLevelFromClauseValue( + const parser::OmpClause &clause, llvm::SmallVector &levels, + llvm::SmallVector &clauses) { + if (const auto tclause{std::get_if(&clause.u)}) { + std::int64_t level = 0; + if (const auto v{EvaluateInt64(context_, tclause->v)}) { + level = *v; } + levels.push_back(level); + clauses.push_back(&clause); } +} - if (orderedLevel && (!collapseLevel || orderedLevel >= collapseLevel)) { - SetAssociatedClause(ordClause); - return orderedLevel; - } else if (!orderedLevel && collapseLevel) { - SetAssociatedClause(collClause); - return collapseLevel; - } else { - SetAssociatedClause(nullptr); +template +void OmpAttributeVisitor::CollectAssociatedLoopLevelFromClauseSize( + const parser::OmpClause &clause, llvm::SmallVector &levels, + llvm::SmallVector &clauses) { + if (const auto tclause{std::get_if(&clause.u)}) { + levels.push_back(tclause->v.size()); + clauses.push_back(&clause); } - // orderedLevel < collapseLevel is an error handled in structural - // checks +} - return 1; // default is outermost loop +void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromClauses( + const parser::OmpClauseList &x, llvm::SmallVector &levels, + llvm::SmallVector &clauses) { + for (const auto &clause : x.v) { + CollectAssociatedLoopLevelFromClauseValue( + clause, levels, clauses); + CollectAssociatedLoopLevelFromClauseValue( + clause, levels, clauses); + CollectAssociatedLoopLevelFromClauseSize( + clause, levels, clauses); + } } // 2.15.1.1 Data-sharing Attribute Rules - Predetermined @@ -2044,10 +2144,18 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel( const parser::OmpClause *clause{GetAssociatedClause()}; bool hasCollapseClause{ clause ? (clause->Id() == llvm::omp::OMPC_collapse) : false}; + const parser::OpenMPLoopConstruct *innerMostLoop = &x; + + while (auto &optLoopCons{ + std::get>(x.t)}) { + if (const auto &innerLoop{ + std::get_if < parser::OpenMPLoopConstruct >>> (innerMostLoop->t)}) { + innerMostLoop = &innerLoop.value().value(); + } + } - auto &optLoopCons = std::get>(x.t); if (optLoopCons.has_value()) { - if (const auto &outer{std::get_if(&*optLoopCons)}) { + if (const auto &outer{std::get_if(innerMostLoop->t)}) { for (const parser::DoConstruct *loop{&*outer}; loop && level > 0; --level) { if (loop->IsDoConcurrent()) { diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h index 047baa3a79f5d..83db78667c7f8 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h @@ -209,6 +209,8 @@ struct ConstructDecompositionT { bool applyClause(const tomp::clause::CollapseT &clause, const ClauseTy *); + bool applyClause(const tomp::clause::SizesT &clause, + const ClauseTy *); bool applyClause(const tomp::clause::PrivateT &clause, const ClauseTy *); bool @@ -482,6 +484,28 @@ bool ConstructDecompositionT::applyClause( return false; } +// FIXME(JAN): Do the correct thing, but for now we'll do the same as collapse +template +bool ConstructDecompositionT::applyClause( + const tomp::clause::SizesT &clause, + const ClauseTy *node) { + // Apply "sizes" to the innermost directive. If it's not one that + // allows it flag an error. + if (!leafs.empty()) { + auto &last = leafs.back(); + + if (llvm::omp::isAllowedClauseForDirective(last.id, node->id, version)) { + last.clauses.push_back(node); + return true; + } else { + llvm::errs() << "** OVERRIDING isAllowedClauseForDirective **\n"; + last.clauses.push_back(node); + return true; + } + } + + return false; +} // PRIVATE // [5.2:111:5-7] diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 1050e3d8b08dd..a994f23c1fbe2 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -2257,6 +2257,9 @@ class OpenMPIRBuilder { /// Return the function that contains the region to be outlined. Function *getFunction() const { return EntryBB->getParent(); } + + /// Dump the info in a somewhat readable way + void dump(); }; /// Collection of regions that need to be outlined during finalization. @@ -2277,6 +2280,9 @@ class OpenMPIRBuilder { /// Add a new region that will be outlined later. void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); } + /// Dump outline infos + void dumpOutlineInfos(); + /// An ordered map of auto-generated variables to their unique names. /// It stores variables with the following names: 1) ".gomp_critical_user_" + /// + ".var" for "omp critical" directives; 2) @@ -3910,6 +3916,9 @@ class CanonicalLoopInfo { /// Invalidate this loop. That is, the underlying IR does not fulfill the /// requirements of an OpenMP canonical loop anymore. LLVM_ABI void invalidate(); + + /// Dump the info in a somewhat readable way + void dump(); }; /// ScanInfo holds the information to assist in lowering of Scan reduction. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 989bcf45e0006..ff50dfbbd5259 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -9145,6 +9145,15 @@ Error OpenMPIRBuilder::emitOffloadingArrays( return Error::success(); } +void OpenMPIRBuilder::dumpOutlineInfos() { + errs() << "=== Outline Infos Begin ===\n"; + for (auto En : enumerate(OutlineInfos)) { + errs() << "[" << En.index() << "]: "; + En.value().dump(); + } + errs() << "=== Outline Infos End ===\n"; +} + void OpenMPIRBuilder::emitBranch(BasicBlock *Target) { BasicBlock *CurBB = Builder.GetInsertBlock(); @@ -10069,6 +10078,14 @@ void OpenMPIRBuilder::OutlineInfo::collectBlocks( } } +void OpenMPIRBuilder::OutlineInfo::dump() { + errs() << "=== OutilneInfo == " + << " EntryBB: " << (EntryBB ? EntryBB->getName() : "n\a") + << " ExitBB: " << (ExitBB ? ExitBB->getName() : "n\a") + << " OuterAllocaBB: " + << (OuterAllocaBB ? OuterAllocaBB->getName() : "n/a") << "\n"; +} + void OpenMPIRBuilder::createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, GlobalValue::LinkageTypes, @@ -10846,3 +10863,10 @@ void CanonicalLoopInfo::invalidate() { Latch = nullptr; Exit = nullptr; } + +void CanonicalLoopInfo::dump() { + errs() << "CanonicaLoop == Header: " << (Header ? Header->getName() : "n/a") + << " Cond: " << (Cond ? Cond->getName() : "n/a") + << " Latch: " << (Latch ? Latch->getName() : "n/a") + << " Exit: " << (Exit ? Exit->getName() : "n/a") << "\n"; +} diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index bbd1ed6a3ab2d..7ad1e70cb6e75 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -810,7 +810,11 @@ void CodeExtractor::severSplitPHINodesOfExits() { } void CodeExtractor::splitReturnBlocks() { - for (BasicBlock *Block : Blocks) + for (BasicBlock *Block : Blocks) { + if (!Block->getTerminator()) { + errs() << "====== No terminator in block: " << Block->getName() + << "======\n"; + } if (ReturnInst *RI = dyn_cast(Block->getTerminator())) { BasicBlock *New = Block->splitBasicBlock(RI->getIterator(), Block->getName() + ".ret"); @@ -827,6 +831,7 @@ void CodeExtractor::splitReturnBlocks() { DT->changeImmediateDominator(I, NewNode); } } + } } Function *CodeExtractor::constructFunctionDeclaration( diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td index 311c57fb4446c..eb836db890738 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td @@ -317,6 +317,38 @@ class OpenMP_DeviceClauseSkip< def OpenMP_DeviceClause : OpenMP_DeviceClauseSkip<>; +//===----------------------------------------------------------------------===// +// V5.2: [XX.X] `collapse` clause +//===----------------------------------------------------------------------===// + +class OpenMP_CollapseClauseSkip< + bit traits = false, bit arguments = false, bit assemblyFormat = false, + bit description = false, bit extraClassDeclaration = false + > : OpenMP_Clause { + let arguments = (ins + DefaultValuedOptionalAttr:$num_collapse + ); +} + +def OpenMP_CollapseClause : OpenMP_CollapseClauseSkip<>; + +//===----------------------------------------------------------------------===// +// V5.2: [xx.x] `sizes` clause +//===----------------------------------------------------------------------===// + +class OpenMP_TileSizesClauseSkip< + bit traits = false, bit arguments = false, bit assemblyFormat = false, + bit description = false, bit extraClassDeclaration = false + > : OpenMP_Clause { + let arguments = (ins + OptionalAttr:$tile_sizes + ); +} + +def OpenMP_TileSizesClause : OpenMP_TileSizesClauseSkip<>; + //===----------------------------------------------------------------------===// // V5.2: [11.6.1] `dist_schedule` clause //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index c956d69781b3d..ee555b695c2ad 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -614,7 +614,9 @@ def WorkshareLoopWrapperOp : OpenMP_Op<"workshare.loop_wrapper", traits = [ def LoopNestOp : OpenMP_Op<"loop_nest", traits = [ RecursiveMemoryEffects, SameVariadicOperandSize ], clauses = [ - OpenMP_LoopRelatedClause + OpenMP_LoopRelatedClause, + OpenMP_CollapseClause, + OpenMP_TileSizesClause ], singleRegion = true> { let summary = "rectangular loop nest"; let description = [{ diff --git a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp index 34f372af1e4b5..bec17258d058f 100644 --- a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp +++ b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp @@ -493,7 +493,8 @@ struct ParallelOpLowering : public OpRewritePattern { // Create loop nest and populate region with contents of scf.parallel. auto loopOp = omp::LoopNestOp::create( rewriter, parallelOp.getLoc(), parallelOp.getLowerBound(), - parallelOp.getUpperBound(), parallelOp.getStep()); + parallelOp.getUpperBound(), parallelOp.getStep(), false, 1, + nullptr); rewriter.inlineRegionBefore(parallelOp.getRegion(), loopOp.getRegion(), loopOp.getRegion().begin()); diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index fa94219016c1e..d2d3362f23a2f 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -56,6 +56,11 @@ makeDenseBoolArrayAttr(MLIRContext *ctx, const ArrayRef boolArray) { return boolArray.empty() ? nullptr : DenseBoolArrayAttr::get(ctx, boolArray); } +static DenseI64ArrayAttr +makeDenseI64ArrayAttr(MLIRContext *ctx, const ArrayRef intArray) { + return intArray.empty() ? nullptr : DenseI64ArrayAttr::get(ctx, intArray); +} + namespace { struct MemRefPointerLikeModel : public PointerLikeType::ExternalModel steps; @@ -2967,6 +2972,38 @@ ParseResult LoopNestOp::parse(OpAsmParser &parser, OperationState &result) { parser.parseOperandList(steps, ivs.size(), OpAsmParser::Delimiter::Paren)) return failure(); + // Parse collapse + int64_t value = 0; + if (!parser.parseOptionalKeyword("collapse") && + (parser.parseLParen() || parser.parseInteger(value) || + parser.parseRParen())) + return failure(); + if (value > 1) { + result.addAttribute( + "num_collapse", + IntegerAttr::get(parser.getBuilder().getI64Type(), value)); + } + + // Parse tiles + SmallVector tiles; + auto parseTiles = [&]() -> ParseResult { + int64_t tile; + if (parser.parseInteger(tile)) + return failure(); + tiles.push_back(tile); + return success(); + }; + + if (!parser.parseOptionalKeyword("tiles") && + (parser.parseLParen() || + parser.parseCommaSeparatedList(parseTiles) || + parser.parseRParen())) + return failure(); + + if (tiles.size() > 0) { + result.addAttribute("tile_sizes", DenseI64ArrayAttr::get(ctx, tiles)); + } + // Parse the body. Region *region = result.addRegion(); if (parser.parseRegion(*region, ivs)) @@ -2990,14 +3027,23 @@ void LoopNestOp::print(OpAsmPrinter &p) { if (getLoopInclusive()) p << "inclusive "; p << "step (" << getLoopSteps() << ") "; + if (int64_t numCollapse = getNumCollapse()) { + if (numCollapse > 1) + p << "collapse(" << numCollapse << ") "; + } + if (const auto tiles = getTileSizes()) { + p << "tiles(" << tiles.value() << ") "; + } p.printRegion(region, /*printEntryBlockArgs=*/false); } void LoopNestOp::build(OpBuilder &builder, OperationState &state, const LoopNestOperands &clauses) { + MLIRContext *ctx = builder.getContext(); LoopNestOp::build(builder, state, clauses.loopLowerBounds, clauses.loopUpperBounds, clauses.loopSteps, - clauses.loopInclusive); + clauses.loopInclusive, clauses.numCollapse, + makeDenseI64ArrayAttr(ctx, clauses.tileSizes)); } LogicalResult LoopNestOp::verify() { diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 6694de8383534..687688092f08e 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -2966,10 +2966,9 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, /// Converts an OpenMP loop nest into LLVM IR using OpenMPIRBuilder. static LogicalResult convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation) { + LLVM::ModuleTranslation &moduleTranslation) { llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); auto loopOp = cast(opInst); - // Set up the source location value for OpenMP runtime. llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); @@ -3035,18 +3034,60 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, loopInfos.push_back(*loopResult); } - // Collapse loops. Store the insertion point because LoopInfos may get - // invalidated. + // llvm::OpenMPIRBuilder::InsertPointTy afterIP = builder.saveIP(); llvm::OpenMPIRBuilder::InsertPointTy afterIP = - loopInfos.front()->getAfterIP(); + loopInfos.front()->getAfterIP(); - // Update the stack frame created for this loop to point to the resulting loop - // after applying transformations. - moduleTranslation.stackWalk( - [&](OpenMPLoopInfoStackFrame &frame) { - frame.loopInfo = ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {}); - return WalkResult::interrupt(); - }); + // Initialize the new loop info to the current one, in case there + // are no loop transformations done. + llvm::CanonicalLoopInfo *NewTopLoopInfo = nullptr; + + // Do tiling + if (const auto &tiles = loopOp.getTileSizes()) { + llvm::Type *IVType = loopInfos.front()->getIndVarType(); + SmallVector TileSizes; + + for (auto tile : tiles.value()) { + llvm::Value *TileVal = llvm::ConstantInt::get(IVType, tile); + TileSizes.push_back(TileVal); + } + + std::vector NewLoops = + ompBuilder->tileLoops(ompLoc.DL, loopInfos, TileSizes); + + // Collapse loops. Store the insertion point because LoopInfos may get + // invalidated. + auto AfterBB = NewLoops.front()->getAfter(); + auto AfterAfterBB = AfterBB->getSingleSuccessor(); + afterIP = {AfterAfterBB, AfterAfterBB->begin()}; + NewTopLoopInfo = NewLoops[0]; + + // Update the loop infos + loopInfos.clear(); + for (const auto& newLoop : NewLoops) { + loopInfos.push_back(newLoop); + } + } // Tiling done + + // Do collapse + if (const auto &numCollapse = loopOp.getNumCollapse()) { + SmallVector collapseLoopInfos( + loopInfos.begin(), loopInfos.begin() + (numCollapse)); + + auto newLoopInfo = + ompBuilder->collapseLoops(ompLoc.DL, collapseLoopInfos, {}); + NewTopLoopInfo = newLoopInfo; + } // Collapse done + + // Update the stack frame created for this loop to point to the resulting + // loop after applying transformations. + if (NewTopLoopInfo) { + moduleTranslation.stackWalk( + [&](OpenMPLoopInfoStackFrame &frame) { + frame.loopInfo = NewTopLoopInfo; + return WalkResult::interrupt(); + }); + } // Continue building IR after the loop. Note that the LoopInfo returned by // `collapseLoops` points inside the outermost loop and is intended for From 69cfb0390e270d6ac1db824bb46e6b78230ada8e Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Tue, 10 Jun 2025 09:51:02 -0400 Subject: [PATCH 03/64] Fix tests and limit the nesting of construct to only tiling. --- flang/lib/Semantics/canonicalize-omp.cpp | 34 ++++++++++++------- .../Lower/OpenMP/parallel-wsloop-lastpriv.f90 | 4 +-- flang/test/Lower/OpenMP/simd.f90 | 2 +- flang/test/Lower/OpenMP/wsloop-variable.f90 | 2 +- flang/test/Semantics/OpenMP/do-collapse.f90 | 1 + .../LLVMIR/omptarget-wsloop-collapsed.mlir | 2 +- mlir/test/Target/LLVMIR/openmp-llvm.mlir | 12 +++---- 7 files changed, 33 insertions(+), 24 deletions(-) diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index fb0bb0f923574..10eaaa83f5f4f 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -149,27 +149,32 @@ class CanonicalizationOfOmp { // Ignore compiler directives. if (GetConstructIf(*nextIt)) continue; - // Keep track of the loops to handle the end loop directives std::stack loops; loops.push(&x); - while (auto *innerConstruct{ + if (auto *innerConstruct{ GetConstructIf(*nextIt)}) { if (auto *innerOmpLoop{ std::get_if(&innerConstruct->u)}) { - std::get< - std::optional>>( - loops.top()->t) = std::move(*innerOmpLoop); - // Retrieveing the address so that DoConstruct or inner loop can be - // set later. - loops.push(&(std::get>>( - loops.top()->t) - .value() - .value())); - nextIt = block.erase(nextIt); + auto &innerBeginDir{ + std::get(innerOmpLoop->t)}; + auto &innerDir{std::get(innerBeginDir.t)}; + if (innerDir.v == llvm::omp::Directive::OMPD_tile) { + std::get>>( + loops.top()->t) = std::move(*innerOmpLoop); + // Retrieveing the address so that DoConstruct or inner loop can be + // set later. + loops.push(&(std::get>>( + loops.top()->t) + .value() + .value())); + nextIt = block.erase(nextIt); + } } } + if (auto *doCons{GetConstructIf(*nextIt)}) { if (doCons->GetLoopControl()) { // move DoConstruct @@ -185,6 +190,9 @@ class CanonicalizationOfOmp { loops.top()->t) = std::move(*endDir); nextIt = block.erase(nextIt); loops.pop(); + } else { + // If there is a mismatch bail out. + break; } } } else { diff --git a/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90 b/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90 index 2890e78e9d17f..faf8f717f6308 100644 --- a/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90 +++ b/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90 @@ -108,7 +108,7 @@ subroutine omp_do_lastprivate_collapse2(a) ! CHECK-NEXT: %[[UB2:.*]] = fir.load %[[ARG0_DECL]]#0 : !fir.ref ! CHECK-NEXT: %[[STEP2:.*]] = arith.constant 1 : i32 ! CHECK-NEXT: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[A_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[I_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[J_PVT_REF:.*]] : !fir.ref, !fir.ref, !fir.ref) { - ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[LB1]], %[[LB2]]) to (%[[UB1]], %[[UB2]]) inclusive step (%[[STEP1]], %[[STEP2]]) { + ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[LB1]], %[[LB2]]) to (%[[UB1]], %[[UB2]]) inclusive step (%[[STEP1]], %[[STEP2]]) collapse(2) { ! CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ea"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ei"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[J_PVT_DECL:.*]]:2 = hlfir.declare %[[J_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ej"} : (!fir.ref) -> (!fir.ref, !fir.ref) @@ -174,7 +174,7 @@ subroutine omp_do_lastprivate_collapse3(a) ! CHECK-NEXT: %[[UB3:.*]] = fir.load %[[ARG0_DECL]]#0 : !fir.ref ! CHECK-NEXT: %[[STEP3:.*]] = arith.constant 1 : i32 ! CHECK-NEXT: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[A_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[I_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[J_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[K_PVT_REF:.*]] : !fir.ref, !fir.ref, !fir.ref, !fir.ref) { - ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]], %[[ARG3:.*]]) : i32 = (%[[LB1]], %[[LB2]], %[[LB3]]) to (%[[UB1]], %[[UB2]], %[[UB3]]) inclusive step (%[[STEP1]], %[[STEP2]], %[[STEP3]]) { + ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]], %[[ARG3:.*]]) : i32 = (%[[LB1]], %[[LB2]], %[[LB3]]) to (%[[UB1]], %[[UB2]], %[[UB3]]) inclusive step (%[[STEP1]], %[[STEP2]], %[[STEP3]]) collapse(3) { ! CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ea"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ei"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[J_PVT_DECL:.*]]:2 = hlfir.declare %[[J_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ej"} : (!fir.ref) -> (!fir.ref, !fir.ref) diff --git a/flang/test/Lower/OpenMP/simd.f90 b/flang/test/Lower/OpenMP/simd.f90 index d815474b84b31..3572b9baff00b 100644 --- a/flang/test/Lower/OpenMP/simd.f90 +++ b/flang/test/Lower/OpenMP/simd.f90 @@ -175,7 +175,7 @@ subroutine simd_with_collapse_clause(n) ! CHECK-NEXT: omp.loop_nest (%[[ARG_0:.*]], %[[ARG_1:.*]]) : i32 = ( ! CHECK-SAME: %[[LOWER_I]], %[[LOWER_J]]) to ( ! CHECK-SAME: %[[UPPER_I]], %[[UPPER_J]]) inclusive step ( - ! CHECK-SAME: %[[STEP_I]], %[[STEP_J]]) { + ! CHECK-SAME: %[[STEP_I]], %[[STEP_J]]) collapse(2) { !$OMP SIMD COLLAPSE(2) do i = 1, n do j = 1, n diff --git a/flang/test/Lower/OpenMP/wsloop-variable.f90 b/flang/test/Lower/OpenMP/wsloop-variable.f90 index a7fb5fb8936e7..cceb77b974fee 100644 --- a/flang/test/Lower/OpenMP/wsloop-variable.f90 +++ b/flang/test/Lower/OpenMP/wsloop-variable.f90 @@ -23,7 +23,7 @@ program wsloop_variable !CHECK: %[[TMP6:.*]] = fir.convert %[[TMP1]] : (i32) -> i64 !CHECK: %[[TMP7:.*]] = fir.convert %{{.*}} : (i32) -> i64 !CHECK: omp.wsloop private({{.*}}) { -!CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) { +!CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) collapse(2) { !CHECK: %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i64) -> i16 !CHECK: hlfir.assign %[[ARG0_I16]] to %[[STORE_IV0:.*]]#0 : i16, !fir.ref !CHECK: hlfir.assign %[[ARG1]] to %[[STORE_IV1:.*]]#0 : i64, !fir.ref diff --git a/flang/test/Semantics/OpenMP/do-collapse.f90 b/flang/test/Semantics/OpenMP/do-collapse.f90 index 480bd45b79b83..ec6a3bdad3686 100644 --- a/flang/test/Semantics/OpenMP/do-collapse.f90 +++ b/flang/test/Semantics/OpenMP/do-collapse.f90 @@ -31,6 +31,7 @@ program omp_doCollapse end do end do + !ERROR: The value of the parameter in the COLLAPSE or ORDERED clause must not be larger than the number of nested loops following the construct. !ERROR: At most one COLLAPSE clause can appear on the SIMD directive !$omp simd collapse(2) collapse(1) do i = 1, 4 diff --git a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir index b42e387acbb11..d84641ff9c99b 100644 --- a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir @@ -9,7 +9,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo %loop_lb = llvm.mlir.constant(0 : i32) : i32 %loop_step = llvm.mlir.constant(1 : index) : i32 omp.wsloop { - omp.loop_nest (%arg1, %arg2) : i32 = (%loop_lb, %loop_lb) to (%loop_ub, %loop_ub) inclusive step (%loop_step, %loop_step) { + omp.loop_nest (%arg1, %arg2) : i32 = (%loop_lb, %loop_lb) to (%loop_ub, %loop_ub) inclusive step (%loop_step, %loop_step) collapse(2) { %1 = llvm.add %arg1, %arg2 : i32 %2 = llvm.mul %arg2, %loop_ub overflow : i32 %3 = llvm.add %arg1, %2 :i32 diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index 3f4dcd5e24c56..27210bc0890ce 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -698,7 +698,7 @@ llvm.func @simd_simple(%lb : i64, %ub : i64, %step : i64, %arg0: !llvm.ptr) { // CHECK-LABEL: @simd_simple_multiple llvm.func @simd_simple_multiple(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) { omp.simd { - omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) inclusive step (%step1, %step2) { + omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) inclusive step (%step1, %step2) collapse(2) { %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32 // The form of the emitted IR is controlled by OpenMPIRBuilder and // tested there. Just check that the right metadata is added and collapsed @@ -736,7 +736,7 @@ llvm.func @simd_simple_multiple(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64 // CHECK-LABEL: @simd_simple_multiple_simdlen llvm.func @simd_simple_multiple_simdlen(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) { omp.simd simdlen(2) { - omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) { + omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) { %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32 // The form of the emitted IR is controlled by OpenMPIRBuilder and // tested there. Just check that the right metadata is added. @@ -760,7 +760,7 @@ llvm.func @simd_simple_multiple_simdlen(%lb1 : i64, %ub1 : i64, %step1 : i64, %l // CHECK-LABEL: @simd_simple_multiple_safelen llvm.func @simd_simple_multiple_safelen(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) { omp.simd safelen(2) { - omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) { + omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) { %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32 %4 = llvm.getelementptr %arg0[%iv1] : (!llvm.ptr, i64) -> !llvm.ptr, f32 %5 = llvm.getelementptr %arg1[%iv2] : (!llvm.ptr, i64) -> !llvm.ptr, f32 @@ -779,7 +779,7 @@ llvm.func @simd_simple_multiple_safelen(%lb1 : i64, %ub1 : i64, %step1 : i64, %l // CHECK-LABEL: @simd_simple_multiple_simdlen_safelen llvm.func @simd_simple_multiple_simdlen_safelen(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) { omp.simd simdlen(1) safelen(2) { - omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) { + omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) { %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32 %4 = llvm.getelementptr %arg0[%iv1] : (!llvm.ptr, i64) -> !llvm.ptr, f32 %5 = llvm.getelementptr %arg1[%iv2] : (!llvm.ptr, i64) -> !llvm.ptr, f32 @@ -1177,7 +1177,7 @@ llvm.func @collapse_wsloop( // CHECK: store i32 %[[TOTAL_SUB_1]], ptr // CHECK: call void @__kmpc_for_static_init_4u omp.wsloop { - omp.loop_nest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) { + omp.loop_nest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) collapse(3) { %31 = llvm.load %20 : !llvm.ptr -> i32 %32 = llvm.add %31, %arg0 : i32 %33 = llvm.add %32, %arg1 : i32 @@ -1239,7 +1239,7 @@ llvm.func @collapse_wsloop_dynamic( // CHECK: store i32 %[[TOTAL]], ptr // CHECK: call void @__kmpc_dispatch_init_4u omp.wsloop schedule(dynamic) { - omp.loop_nest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) { + omp.loop_nest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) collapse(3) { %31 = llvm.load %20 : !llvm.ptr -> i32 %32 = llvm.add %31, %arg0 : i32 %33 = llvm.add %32, %arg1 : i32 From 34888b13ab38713b00ce64fe908286592d84434d Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Tue, 10 Jun 2025 10:18:32 -0400 Subject: [PATCH 04/64] Enable stand-alone tiling, but it gives a warning and converting to simd. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 42 ++++++++++++++++++--- flang/test/Lower/OpenMP/wsloop-collapse.f90 | 2 +- 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index a01bb371411ad..4cfb9414e26e6 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -2267,6 +2267,39 @@ static void genUnrollOp(Fortran::lower::AbstractConverter &converter, // Apply unrolling to it auto cli = canonLoop.getCli(); mlir::omp::UnrollHeuristicOp::create(firOpBuilder, loc, cli); + +static mlir::omp::LoopOp +genTiledLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, + mlir::Location loc, const ConstructQueue &queue, + ConstructQueue::const_iterator item) { + mlir::omp::LoopOperands loopClauseOps; + llvm::SmallVector loopReductionSyms; + genLoopClauses(converter, semaCtx, item->clauses, loc, loopClauseOps, + loopReductionSyms); + + DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, + /*shouldCollectPreDeterminedSymbols=*/true, + /*useDelayedPrivatization=*/true, symTable); + dsp.processStep1(&loopClauseOps); + + mlir::omp::LoopNestOperands loopNestClauseOps; + llvm::SmallVector iv; + genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, + loopNestClauseOps, iv); + + EntryBlockArgs loopArgs; + loopArgs.priv.syms = dsp.getDelayedPrivSymbols(); + loopArgs.priv.vars = loopClauseOps.privateVars; + loopArgs.reduction.syms = loopReductionSyms; + loopArgs.reduction.vars = loopClauseOps.reductionVars; + + auto loopOp = + genWrapperOp(converter, loc, loopClauseOps, loopArgs); + genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, + loopNestClauseOps, iv, {{loopOp, loopArgs}}, + llvm::omp::Directive::OMPD_loop, dsp); + return loopOp; } static mlir::omp::MaskedOp @@ -3487,13 +3520,10 @@ static void genOMPDispatch(lower::AbstractConverter &converter, newOp = genTeamsOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; - case llvm::omp::Directive::OMPD_tile: { - unsigned version = semaCtx.langOptions().OpenMPVersion; - if (!semaCtx.langOptions().OpenMPSimd) - TODO(loc, "Unhandled loop directive (" + - llvm::omp::getOpenMPDirectiveName(dir, version) + ")"); + case llvm::omp::Directive::OMPD_tile: + newOp = + genTiledLoopOp(converter, symTable, semaCtx, eval, loc, queue, item); break; - } case llvm::omp::Directive::OMPD_unroll: genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; diff --git a/flang/test/Lower/OpenMP/wsloop-collapse.f90 b/flang/test/Lower/OpenMP/wsloop-collapse.f90 index 7ec40ab4b2f43..677c7809c397f 100644 --- a/flang/test/Lower/OpenMP/wsloop-collapse.f90 +++ b/flang/test/Lower/OpenMP/wsloop-collapse.f90 @@ -57,7 +57,7 @@ program wsloop_collapse !CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref !CHECK: %[[VAL_32:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_4:.*]], @{{.*}} %{{.*}}#0 -> %[[VAL_2:.*]], @{{.*}} %{{.*}}#0 -> %[[VAL_0:.*]] : !fir.ref, !fir.ref, !fir.ref) { -!CHECK-NEXT: omp.loop_nest (%[[VAL_33:.*]], %[[VAL_34:.*]], %[[VAL_35:.*]]) : i32 = (%[[VAL_24]], %[[VAL_27]], %[[VAL_30]]) to (%[[VAL_25]], %[[VAL_28]], %[[VAL_31]]) inclusive step (%[[VAL_26]], %[[VAL_29]], %[[VAL_32]]) { +!CHECK-NEXT: omp.loop_nest (%[[VAL_33:.*]], %[[VAL_34:.*]], %[[VAL_35:.*]]) : i32 = (%[[VAL_24]], %[[VAL_27]], %[[VAL_30]]) to (%[[VAL_25]], %[[VAL_28]], %[[VAL_31]]) inclusive step (%[[VAL_26]], %[[VAL_29]], %[[VAL_32]]) collapse(3) { !$omp do collapse(3) do i = 1, a do j= 1, b From 79270accfdaeca0dfcb6888d42c14834367ab2e8 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Wed, 11 Jun 2025 10:25:00 -0400 Subject: [PATCH 05/64] Add minimal test, remove debug print. --- flang/test/Lower/OpenMP/wsloop-tile.f90 | 30 +++++++++++++++++++ .../Frontend/OpenMP/ConstructDecompositionT.h | 2 +- 2 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 flang/test/Lower/OpenMP/wsloop-tile.f90 diff --git a/flang/test/Lower/OpenMP/wsloop-tile.f90 b/flang/test/Lower/OpenMP/wsloop-tile.f90 new file mode 100644 index 0000000000000..f43b558ce46bb --- /dev/null +++ b/flang/test/Lower/OpenMP/wsloop-tile.f90 @@ -0,0 +1,30 @@ +! This test checks lowering of OpenMP DO Directive(Worksharing) with collapse. + +! RUN: bbc -fopenmp -fopenmp-version=51 -emit-hlfir %s -o - | FileCheck %s + +!CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "wsloop_tile"} { +program wsloop_tile + integer :: i, j, k + integer :: a, b, c + integer :: x + + a=30 + b=20 + c=50 + x=0 + + !CHECK: omp.loop_nest + !CHECK-SAME: tiles(2, 5, 10) + + !$omp do + !$omp tile sizes(2,5,10) + do i = 1, a + do j= 1, b + do k = 1, c + x = x + i + j + k + end do + end do + end do + !$omp end tile + !$omp end do +end program wsloop_tile diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h index 83db78667c7f8..e1083b7ef2cd9 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h @@ -498,7 +498,7 @@ bool ConstructDecompositionT::applyClause( last.clauses.push_back(node); return true; } else { - llvm::errs() << "** OVERRIDING isAllowedClauseForDirective **\n"; + // llvm::errs() << "** OVERRIDING isAllowedClauseForDirective **\n"; last.clauses.push_back(node); return true; } From dd943a84603be324af8cee2704565a05fe7fe297 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Fri, 13 Jun 2025 09:53:10 -0400 Subject: [PATCH 06/64] Fix formatting --- flang/lib/Lower/OpenMP/OpenMP.cpp | 27 +++++++++++-------- flang/lib/Lower/OpenMP/Utils.cpp | 3 +-- flang/lib/Semantics/canonicalize-omp.cpp | 4 +-- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 3 +-- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 12 ++++----- 5 files changed, 26 insertions(+), 23 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 4cfb9414e26e6..a19283286df41 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -427,9 +427,10 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, if (innerOptional.has_value()) { const auto &innerLoopDirective = innerOptional.value().value(); const auto &innerBegin = - std::get(innerLoopDirective.t); + std::get( + innerLoopDirective.t); const auto &innerDirective = - std::get(innerBegin.t); + std::get(innerBegin.t); if (innerDirective.v == llvm::omp::Directive::OMPD_tile) { middleClauseList = &std::get(innerBegin.t); @@ -2268,11 +2269,13 @@ static void genUnrollOp(Fortran::lower::AbstractConverter &converter, auto cli = canonLoop.getCli(); mlir::omp::UnrollHeuristicOp::create(firOpBuilder, loc, cli); -static mlir::omp::LoopOp -genTiledLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable, - semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, - mlir::Location loc, const ConstructQueue &queue, - ConstructQueue::const_iterator item) { +static mlir::omp::LoopOp genTiledLoopOp(lower::AbstractConverter &converter, + lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, + mlir::Location loc, + const ConstructQueue &queue, + ConstructQueue::const_iterator item) { mlir::omp::LoopOperands loopClauseOps; llvm::SmallVector loopReductionSyms; genLoopClauses(converter, semaCtx, item->clauses, loc, loopClauseOps, @@ -3522,7 +3525,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, break; case llvm::omp::Directive::OMPD_tile: newOp = - genTiledLoopOp(converter, symTable, semaCtx, eval, loc, queue, item); + genTiledLoopOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_unroll: genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); @@ -3966,13 +3969,15 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, List clauses = makeClauses( std::get(beginLoopDirective.t), semaCtx); - const auto &innerOptional = std::get>>(loopConstruct.t); + const auto &innerOptional = + std::get>>( + loopConstruct.t); if (innerOptional.has_value()) { const auto &innerLoopDirective = innerOptional.value().value(); const auto &innerBegin = - std::get(innerLoopDirective.t); + std::get(innerLoopDirective.t); const auto &innerDirective = - std::get(innerBegin.t); + std::get(innerBegin.t); if (innerDirective.v == llvm::omp::Directive::OMPD_tile) { clauses.append( makeClauses(std::get(innerBegin.t), semaCtx)); diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 11721d05001b0..69d74762ace6f 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -51,8 +51,7 @@ int64_t getCollapseValue(const List &clauses) { } collapseValue = collapseValue - numTileSizes; - int64_t result = - collapseValue > numTileSizes ? collapseValue : numTileSizes; + int64_t result = collapseValue > numTileSizes ? collapseValue : numTileSizes; return result; } diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index 10eaaa83f5f4f..c519cb43628ed 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -11,7 +11,7 @@ #include "flang/Parser/parse-tree.h" #include "flang/Semantics/semantics.h" -# include +#include // After Loop Canonicalization, rewrite OpenMP parse tree to make OpenMP // Constructs more structured which provide explicit scopes for later // structural checks and semantic analysis. @@ -153,7 +153,7 @@ class CanonicalizationOfOmp { std::stack loops; loops.push(&x); if (auto *innerConstruct{ - GetConstructIf(*nextIt)}) { + GetConstructIf(*nextIt)}) { if (auto *innerOmpLoop{ std::get_if(&innerConstruct->u)}) { auto &innerBeginDir{ diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index d2d3362f23a2f..3c65ddadff019 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -2995,8 +2995,7 @@ ParseResult LoopNestOp::parse(OpAsmParser &parser, OperationState &result) { }; if (!parser.parseOptionalKeyword("tiles") && - (parser.parseLParen() || - parser.parseCommaSeparatedList(parseTiles) || + (parser.parseLParen() || parser.parseCommaSeparatedList(parseTiles) || parser.parseRParen())) return failure(); diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 687688092f08e..dccf241e919a7 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -2966,7 +2966,7 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, /// Converts an OpenMP loop nest into LLVM IR using OpenMPIRBuilder. static LogicalResult convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation) { + LLVM::ModuleTranslation &moduleTranslation) { llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); auto loopOp = cast(opInst); // Set up the source location value for OpenMP runtime. @@ -3036,7 +3036,7 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, // llvm::OpenMPIRBuilder::InsertPointTy afterIP = builder.saveIP(); llvm::OpenMPIRBuilder::InsertPointTy afterIP = - loopInfos.front()->getAfterIP(); + loopInfos.front()->getAfterIP(); // Initialize the new loop info to the current one, in case there // are no loop transformations done. @@ -3048,12 +3048,12 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, SmallVector TileSizes; for (auto tile : tiles.value()) { - llvm::Value *TileVal = llvm::ConstantInt::get(IVType, tile); + llvm::Value *TileVal = llvm::ConstantInt::get(IVType, tile); TileSizes.push_back(TileVal); } - std::vector NewLoops = - ompBuilder->tileLoops(ompLoc.DL, loopInfos, TileSizes); + std::vector NewLoops = + ompBuilder->tileLoops(ompLoc.DL, loopInfos, TileSizes); // Collapse loops. Store the insertion point because LoopInfos may get // invalidated. @@ -3064,7 +3064,7 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, // Update the loop infos loopInfos.clear(); - for (const auto& newLoop : NewLoops) { + for (const auto &newLoop : NewLoops) { loopInfos.push_back(newLoop); } } // Tiling done From 822fd824e3cd71684544fdcfab1a2c73d3736333 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Sat, 14 Jun 2025 06:29:58 -0400 Subject: [PATCH 07/64] Fix formatting --- flang/lib/Semantics/canonicalize-omp.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index c519cb43628ed..1d00bdaad777c 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -166,10 +166,10 @@ class CanonicalizationOfOmp { // Retrieveing the address so that DoConstruct or inner loop can be // set later. loops.push(&(std::get>>( + common::Indirection>>( loops.top()->t) - .value() - .value())); + .value() + .value())); nextIt = block.erase(nextIt); } } From 48bcab418f34da713cd3234c6f0dbf177da78495 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Thu, 19 Jun 2025 15:52:55 -0400 Subject: [PATCH 08/64] Fix test. --- flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 | 1 + 1 file changed, 1 insertion(+) diff --git a/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 b/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 index bb1929249183b..355626f6e73b9 100644 --- a/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 +++ b/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 @@ -1,6 +1,7 @@ !RUN: %python %S/../test_errors.py %s %flang -fopenmp integer :: i, j +! ERROR: DO CONCURRENT loops cannot be used with the COLLAPSE clause. !$omp parallel do collapse(2) do i = 1, 1 ! ERROR: DO CONCURRENT loops cannot form part of a loop nest. From 90320a91c64e454171204143fcb999a1992a2bf8 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Fri, 20 Jun 2025 07:31:02 -0400 Subject: [PATCH 09/64] Add more mlir tests. Set collapse value when lowering from SCF to OpenMP. --- .../Conversion/SCFToOpenMP/SCFToOpenMP.cpp | 4 +- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 12 +++++ .../Conversion/SCFToOpenMP/scf-to-openmp.mlir | 2 +- mlir/test/Dialect/OpenMP/invalid.mlir | 23 ++++++++ mlir/test/Dialect/OpenMP/ops.mlir | 54 +++++++++++++++++++ 5 files changed, 92 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp index bec17258d058f..f056e72531bfc 100644 --- a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp +++ b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp @@ -493,8 +493,8 @@ struct ParallelOpLowering : public OpRewritePattern { // Create loop nest and populate region with contents of scf.parallel. auto loopOp = omp::LoopNestOp::create( rewriter, parallelOp.getLoc(), parallelOp.getLowerBound(), - parallelOp.getUpperBound(), parallelOp.getStep(), false, 1, - nullptr); + parallelOp.getUpperBound(), parallelOp.getStep(), false, + parallelOp.getLowerBound().size(), nullptr); rewriter.inlineRegionBefore(parallelOp.getRegion(), loopOp.getRegion(), loopOp.getRegion().begin()); diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 3c65ddadff019..19bff545fb202 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -3058,6 +3058,18 @@ LogicalResult LoopNestOp::verify() { << "range argument type does not match corresponding IV type"; } + uint64_t numIVs = getIVs().size(); + + if (const auto &numCollapse = getNumCollapse()) + if (numCollapse > numIVs) + return emitOpError() + << "collapse value is larger than the number of loops"; + + if (const auto &tiles = getTileSizes()) + if (tiles.value().size() > numIVs) + return emitOpError() + << "number of tilings is larger than the number of loops"; + if (!llvm::dyn_cast_if_present((*this)->getParentOp())) return emitOpError() << "expects parent op to be a loop wrapper"; diff --git a/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir b/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir index a722acbf2c347..d362bb6092419 100644 --- a/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir +++ b/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir @@ -6,7 +6,7 @@ func.func @parallel(%arg0: index, %arg1: index, %arg2: index, // CHECK: %[[FOUR:.+]] = llvm.mlir.constant(4 : i32) : i32 // CHECK: omp.parallel num_threads(%[[FOUR]] : i32) { // CHECK: omp.wsloop { - // CHECK: omp.loop_nest (%[[LVAR1:.*]], %[[LVAR2:.*]]) : index = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) { + // CHECK: omp.loop_nest (%[[LVAR1:.*]], %[[LVAR2:.*]]) : index = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) collapse(2) { // CHECK: memref.alloca_scope scf.parallel (%i, %j) = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) { // CHECK: "test.payload"(%[[LVAR1]], %[[LVAR2]]) : (index, index) -> () diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir index 5088f2dfa7d7a..c6b4ae02602d9 100644 --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -157,6 +157,29 @@ func.func @no_loops(%lb : index, %ub : index, %step : index) { } } +// ----- + +func.func @collapse_size(%lb : index, %ub : index, %step : index) { + omp.wsloop { + // expected-error@+1 {{collapse value is larger than the number of loops}} + omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) collapse(4) { + omp.yield + } + } +} + +// ----- + +func.func @tiles_length(%lb : index, %ub : index, %step : index) { + omp.wsloop { + // expected-error@+1 {{number of tilings is larger than the number of loops}} + omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) tiles(2, 4) { + omp.yield + } + } +} + + // ----- func.func @inclusive_not_a_clause(%lb : index, %ub : index, %step : index) { diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index 8c846cde1a3ca..e627a86e69185 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -376,6 +376,60 @@ func.func @omp_loop_nest_pretty_multiple(%lb1 : i32, %ub1 : i32, %step1 : i32, return } +// CHECK-LABEL: omp_loop_nest_pretty_multiple_collapse +func.func @omp_loop_nest_pretty_multiple_collapse(%lb1 : i32, %ub1 : i32, %step1 : i32, + %lb2 : i32, %ub2 : i32, %step2 : i32, %data1 : memref) -> () { + + omp.wsloop { + // CHECK: omp.loop_nest (%{{.*}}, %{{.*}}) : i32 = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}) collapse(2) + omp.loop_nest (%iv1, %iv2) : i32 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) { + %1 = "test.payload"(%iv1) : (i32) -> (index) + %2 = "test.payload"(%iv2) : (i32) -> (index) + memref.store %iv1, %data1[%1] : memref + memref.store %iv2, %data1[%2] : memref + omp.yield + } + } + + return +} + +// CHECK-LABEL: omp_loop_nest_pretty_multiple_tiles +func.func @omp_loop_nest_pretty_multiple_tiles(%lb1 : i32, %ub1 : i32, %step1 : i32, + %lb2 : i32, %ub2 : i32, %step2 : i32, %data1 : memref) -> () { + + omp.wsloop { + // CHECK: omp.loop_nest (%{{.*}}, %{{.*}}) : i32 = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}) tiles(5, 10) + omp.loop_nest (%iv1, %iv2) : i32 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) tiles(5, 10) { + %1 = "test.payload"(%iv1) : (i32) -> (index) + %2 = "test.payload"(%iv2) : (i32) -> (index) + memref.store %iv1, %data1[%1] : memref + memref.store %iv2, %data1[%2] : memref + omp.yield + } + } + + return +} + +// CHECK-LABEL: omp_loop_nest_pretty_multiple_collapse_tiles +func.func @omp_loop_nest_pretty_multiple_collapse_tiles(%lb1 : i32, %ub1 : i32, %step1 : i32, + %lb2 : i32, %ub2 : i32, %step2 : i32, %data1 : memref) -> () { + + omp.wsloop { + // CHECK: omp.loop_nest (%{{.*}}, %{{.*}}) : i32 = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}) collapse(2) tiles(5, 10) + omp.loop_nest (%iv1, %iv2) : i32 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) tiles(5, 10) { + %1 = "test.payload"(%iv1) : (i32) -> (index) + %2 = "test.payload"(%iv2) : (i32) -> (index) + memref.store %iv1, %data1[%1] : memref + memref.store %iv2, %data1[%2] : memref + omp.yield + } + } + + return +} + // CHECK-LABEL: omp_wsloop func.func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memref, %linear_var : i32, %chunk_var : i32) -> () { From d8a976f4c7c9fb426c8e664e0b5962cfe036198b Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Fri, 20 Jun 2025 07:52:28 -0400 Subject: [PATCH 10/64] Use llvm::SmallVector instead of std::stack --- flang/lib/Semantics/canonicalize-omp.cpp | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index 1d00bdaad777c..5264ec25fd80c 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -10,8 +10,6 @@ #include "flang/Parser/parse-tree-visitor.h" #include "flang/Parser/parse-tree.h" #include "flang/Semantics/semantics.h" - -#include // After Loop Canonicalization, rewrite OpenMP parse tree to make OpenMP // Constructs more structured which provide explicit scopes for later // structural checks and semantic analysis. @@ -150,8 +148,8 @@ class CanonicalizationOfOmp { if (GetConstructIf(*nextIt)) continue; // Keep track of the loops to handle the end loop directives - std::stack loops; - loops.push(&x); + llvm::SmallVector loops; + loops.push_back(&x); if (auto *innerConstruct{ GetConstructIf(*nextIt)}) { if (auto *innerOmpLoop{ @@ -162,12 +160,12 @@ class CanonicalizationOfOmp { if (innerDir.v == llvm::omp::Directive::OMPD_tile) { std::get>>( - loops.top()->t) = std::move(*innerOmpLoop); + loops.back()->t) = std::move(*innerOmpLoop); // Retrieveing the address so that DoConstruct or inner loop can be // set later. - loops.push(&(std::get>>( - loops.top()->t) + loops.back()->t) .value() .value())); nextIt = block.erase(nextIt); @@ -180,16 +178,16 @@ class CanonicalizationOfOmp { // move DoConstruct std::get>>>( - loops.top()->t) = std::move(*doCons); + loops.back()->t) = std::move(*doCons); nextIt = block.erase(nextIt); // try to match OmpEndLoopDirective while (nextIt != block.end() && !loops.empty()) { if (auto *endDir{ GetConstructIf(*nextIt)}) { std::get>( - loops.top()->t) = std::move(*endDir); + loops.back()->t) = std::move(*endDir); nextIt = block.erase(nextIt); - loops.pop(); + loops.pop_back(); } else { // If there is a mismatch bail out. break; From 1d133e9b8158fd36e6e99cd9eeb8a9d37d4753b1 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Sat, 21 Jun 2025 09:11:58 -0400 Subject: [PATCH 11/64] Improve test a bit to make sure IVs are used as expected. --- flang/test/Lower/OpenMP/wsloop-tile.f90 | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/flang/test/Lower/OpenMP/wsloop-tile.f90 b/flang/test/Lower/OpenMP/wsloop-tile.f90 index f43b558ce46bb..c9bf18e3b278d 100644 --- a/flang/test/Lower/OpenMP/wsloop-tile.f90 +++ b/flang/test/Lower/OpenMP/wsloop-tile.f90 @@ -13,7 +13,7 @@ program wsloop_tile c=50 x=0 - !CHECK: omp.loop_nest + !CHECK: omp.loop_nest (%[[IV_0:.*]], %[[IV_1:.*]], %[[IV_2:.*]]) : i32 !CHECK-SAME: tiles(2, 5, 10) !$omp do @@ -21,6 +21,15 @@ program wsloop_tile do i = 1, a do j= 1, b do k = 1, c + !CHECK: hlfir.assign %[[IV_0]] to %[[IV_0A:.*]] : i32 + !CHECK: hlfir.assign %[[IV_1]] to %[[IV_1A:.*]] : i32 + !CHECK: hlfir.assign %[[IV_2]] to %[[IV_2A:.*]] : i32 + !CHECK: %[[IVV_0:.*]] = fir.load %[[IV_0A]] + !CHECK: %[[SUM0:.*]] = arith.addi %{{.*}}, %[[IVV_0]] : i32 + !CHECK: %[[IVV_1:.*]] = fir.load %[[IV_1A]] + !CHECK: %[[SUM1:.*]] = arith.addi %[[SUM0]], %[[IVV_1]] : i32 + !CHECK: %[[IVV_2:.*]] = fir.load %[[IV_2A]] + !CHECK: %[[SUM2:.*]] = arith.addi %[[SUM1]], %[[IVV_2]] : i32 x = x + i + j + k end do end do From 04e73de83538311995c59e016930b29de9c95735 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Sat, 21 Jun 2025 09:30:57 -0400 Subject: [PATCH 12/64] Fix comments to clarify canonicalization. --- flang/lib/Semantics/canonicalize-omp.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index 5264ec25fd80c..d5b5b14d22dc2 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -119,13 +119,15 @@ class CanonicalizationOfOmp { // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct t-> // OmpBeginLoopDirective t-> OmpLoopDirective // [ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct u-> - /// OmpBeginLoopDirective t-> OmpLoopDirective t-> Tile v-> OMP_tile] + // OmpBeginLoopDirective t-> OmpLoopDirective t-> Tile v-> OMP_tile] // ExecutableConstruct -> DoConstruct - // [ExecutableConstruct -> OmpEndLoopDirective] + // [ExecutableConstruct -> OmpEndLoopDirective] (note: tile) // ExecutableConstruct -> OmpEndLoopDirective (if available) // // After rewriting: // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct t-> + // [OpenMPLoopConstruct t -> OmpBeginLoopDirective -> OmpLoopDirective + // OmpEndLoopDirective] (note: tile) // OmpBeginLoopDirective t -> OmpLoopDirective -> DoConstruct // OmpEndLoopDirective (if available) parser::Block::iterator nextIt; From e46d100c2724f785aa4910518902172c434a6087 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Sat, 21 Jun 2025 12:05:40 -0400 Subject: [PATCH 13/64] Special handling of tile directive when dealing with start end end loop directives. --- flang/lib/Semantics/canonicalize-omp.cpp | 31 ++++++++++++++++-------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index d5b5b14d22dc2..a7749b5a81678 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -160,16 +160,13 @@ class CanonicalizationOfOmp { std::get(innerOmpLoop->t)}; auto &innerDir{std::get(innerBeginDir.t)}; if (innerDir.v == llvm::omp::Directive::OMPD_tile) { - std::get>>( - loops.back()->t) = std::move(*innerOmpLoop); + loops.back()->t); + innerLoop = std::move(*innerOmpLoop); // Retrieveing the address so that DoConstruct or inner loop can be // set later. - loops.push_back(&(std::get>>( - loops.back()->t) - .value() - .value())); + loops.push_back(&(innerLoop.value().value())); nextIt = block.erase(nextIt); } } @@ -186,9 +183,23 @@ class CanonicalizationOfOmp { while (nextIt != block.end() && !loops.empty()) { if (auto *endDir{ GetConstructIf(*nextIt)}) { - std::get>( - loops.back()->t) = std::move(*endDir); - nextIt = block.erase(nextIt); + auto &endOmpDirective{ + std::get(endDir->t)}; + auto &loopBegin{ + std::get(loops.back()->t)}; + auto &loopDir{std::get(loopBegin.t)}; + + // If the directive is a tile we try to match the corresponding + // end tile if it exsists. If it is not a tile directive we + // always assign the end loop directive and fall back on the + // existing directive structure checks. + if (loopDir.v != llvm::omp::Directive::OMPD_tile || + loopDir.v == endOmpDirective.v) { + std::get>( + loops.back()->t) = std::move(*endDir); + nextIt = block.erase(nextIt); + } + loops.pop_back(); } else { // If there is a mismatch bail out. From 9cacf3c0d04d5d8d5d5605f71def51da5d55b24b Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Sat, 21 Jun 2025 12:20:33 -0400 Subject: [PATCH 14/64] Inline functions. --- flang/lib/Semantics/resolve-directives.cpp | 62 +++++++++------------- 1 file changed, 24 insertions(+), 38 deletions(-) diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 0e87dcfdbbbb8..934a06b2aef33 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -828,14 +828,6 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { void CollectAssociatedLoopLevelsFromInnerLoopContruct( const parser::OpenMPLoopConstruct &, llvm::SmallVector &, llvm::SmallVector &); - template - void CollectAssociatedLoopLevelFromClauseValue( - const parser::OmpClause &clause, llvm::SmallVector &, - llvm::SmallVector &); - template - void CollectAssociatedLoopLevelFromClauseSize(const parser::OmpClause &, - llvm::SmallVector &, - llvm::SmallVector &); void CollectAssociatedLoopLevelsFromClauses(const parser::OmpClauseList &, llvm::SmallVector &, llvm::SmallVector &); @@ -2078,40 +2070,34 @@ void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromInnerLoopContruct( } } -template -void OmpAttributeVisitor::CollectAssociatedLoopLevelFromClauseValue( - const parser::OmpClause &clause, llvm::SmallVector &levels, - llvm::SmallVector &clauses) { - if (const auto tclause{std::get_if(&clause.u)}) { - std::int64_t level = 0; - if (const auto v{EvaluateInt64(context_, tclause->v)}) { - level = *v; - } - levels.push_back(level); - clauses.push_back(&clause); - } -} - -template -void OmpAttributeVisitor::CollectAssociatedLoopLevelFromClauseSize( - const parser::OmpClause &clause, llvm::SmallVector &levels, - llvm::SmallVector &clauses) { - if (const auto tclause{std::get_if(&clause.u)}) { - levels.push_back(tclause->v.size()); - clauses.push_back(&clause); - } -} - void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromClauses( const parser::OmpClauseList &x, llvm::SmallVector &levels, llvm::SmallVector &clauses) { for (const auto &clause : x.v) { - CollectAssociatedLoopLevelFromClauseValue( - clause, levels, clauses); - CollectAssociatedLoopLevelFromClauseValue( - clause, levels, clauses); - CollectAssociatedLoopLevelFromClauseSize( - clause, levels, clauses); + if (const auto oclause{ + std::get_if(&clause.u)}) { + std::int64_t level = 0; + if (const auto v{EvaluateInt64(context_, oclause->v)}) { + level = *v; + } + levels.push_back(level); + clauses.push_back(&clause); + } + + if (const auto cclause{ + std::get_if(&clause.u)}) { + std::int64_t level = 0; + if (const auto v{EvaluateInt64(context_, cclause->v)}) { + level = *v; + } + levels.push_back(level); + clauses.push_back(&clause); + } + + if (const auto tclause{std::get_if(&clause.u)}) { + levels.push_back(tclause->v.size()); + clauses.push_back(&clause); + } } } From 279ee72fcd9dde83ff44a2c001864f735f4cfa70 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Mon, 23 Jun 2025 11:03:50 -0400 Subject: [PATCH 15/64] Remove debug code. --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 7ad1e70cb6e75..bbd1ed6a3ab2d 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -810,11 +810,7 @@ void CodeExtractor::severSplitPHINodesOfExits() { } void CodeExtractor::splitReturnBlocks() { - for (BasicBlock *Block : Blocks) { - if (!Block->getTerminator()) { - errs() << "====== No terminator in block: " << Block->getName() - << "======\n"; - } + for (BasicBlock *Block : Blocks) if (ReturnInst *RI = dyn_cast(Block->getTerminator())) { BasicBlock *New = Block->splitBasicBlock(RI->getIterator(), Block->getName() + ".ret"); @@ -831,7 +827,6 @@ void CodeExtractor::splitReturnBlocks() { DT->changeImmediateDominator(I, NewNode); } } - } } Function *CodeExtractor::constructFunctionDeclaration( From ace5268bc9f65953607c703cbd9ea36ef05e63c5 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Mon, 23 Jun 2025 11:08:42 -0400 Subject: [PATCH 16/64] Reuse loop op lowering, add comment. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index a19283286df41..7de79d7333f25 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -3525,7 +3525,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, break; case llvm::omp::Directive::OMPD_tile: newOp = - genTiledLoopOp(converter, symTable, semaCtx, eval, loc, queue, item); + genLoopOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_unroll: genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); From 57b37f053079696f884f791247382ee15b383750 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Mon, 23 Jun 2025 11:12:51 -0400 Subject: [PATCH 17/64] Fix formatting. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 7de79d7333f25..3d5162551778d 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -3524,8 +3524,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, item); break; case llvm::omp::Directive::OMPD_tile: - newOp = - genLoopOp(converter, symTable, semaCtx, eval, loc, queue, item); + newOp = genLoopOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_unroll: genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); From 9ac5ccea3ca8995f95711058fb3959d293bc4594 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Mon, 23 Jun 2025 11:21:03 -0400 Subject: [PATCH 18/64] Remove curly braces. --- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 19bff545fb202..bac07e1ac17d5 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -2978,11 +2978,10 @@ ParseResult LoopNestOp::parse(OpAsmParser &parser, OperationState &result) { (parser.parseLParen() || parser.parseInteger(value) || parser.parseRParen())) return failure(); - if (value > 1) { + if (value > 1) result.addAttribute( "num_collapse", IntegerAttr::get(parser.getBuilder().getI64Type(), value)); - } // Parse tiles SmallVector tiles; @@ -2999,9 +2998,8 @@ ParseResult LoopNestOp::parse(OpAsmParser &parser, OperationState &result) { parser.parseRParen())) return failure(); - if (tiles.size() > 0) { + if (tiles.size() > 0) result.addAttribute("tile_sizes", DenseI64ArrayAttr::get(ctx, tiles)); - } // Parse the body. Region *region = result.addRegion(); @@ -3026,13 +3024,13 @@ void LoopNestOp::print(OpAsmPrinter &p) { if (getLoopInclusive()) p << "inclusive "; p << "step (" << getLoopSteps() << ") "; - if (int64_t numCollapse = getNumCollapse()) { + if (int64_t numCollapse = getNumCollapse()) if (numCollapse > 1) p << "collapse(" << numCollapse << ") "; - } - if (const auto tiles = getTileSizes()) { + + if (const auto tiles = getTileSizes()) p << "tiles(" << tiles.value() << ") "; - } + p.printRegion(region, /*printEntryBlockArgs=*/false); } From 7447e373e13276f053515dd92d1e22cca010f2df Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Wed, 25 Jun 2025 10:11:36 -0400 Subject: [PATCH 19/64] Avoid attaching the sizes clause to the parent construct, instead find the tile sizes through the parse tree when getting the information needed to create the loop nest ops. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 23 ++--- flang/lib/Lower/OpenMP/Utils.cpp | 90 ++++++++++++++++++- flang/lib/Lower/OpenMP/Utils.h | 5 ++ .../Frontend/OpenMP/ConstructDecompositionT.h | 4 - 4 files changed, 99 insertions(+), 23 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 3d5162551778d..646c515280c76 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -47,6 +47,7 @@ using namespace Fortran::lower::omp; using namespace Fortran::common::openmp; +using namespace Fortran::semantics; //===----------------------------------------------------------------------===// // Code generation helper functions @@ -1690,6 +1691,7 @@ genLoopNestClauses(lower::AbstractConverter &converter, int64_t collapseValue = evaluate::ToInt64(collapse.v).value(); clauseOps.numCollapse = firOpBuilder.getI64IntegerAttr(collapseValue); } else if (clause.id == llvm::omp::Clause::OMPC_sizes) { + // This case handles the stand-alone tiling construct const auto &sizes = std::get(clause.u); llvm::SmallVector sizeValues; for (auto &size : sizes.v) { @@ -1699,6 +1701,12 @@ genLoopNestClauses(lower::AbstractConverter &converter, clauseOps.tileSizes = sizeValues; } } + + llvm::SmallVector sizeValues; + auto *ompCons{eval.getIf()}; + collectTileSizesFromOpenMPConstruct (ompCons, sizeValues, semaCtx); + if (sizeValues.size() > 0) + clauseOps.tileSizes = sizeValues; } static void genLoopClauses( @@ -3968,21 +3976,6 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, List clauses = makeClauses( std::get(beginLoopDirective.t), semaCtx); - const auto &innerOptional = - std::get>>( - loopConstruct.t); - if (innerOptional.has_value()) { - const auto &innerLoopDirective = innerOptional.value().value(); - const auto &innerBegin = - std::get(innerLoopDirective.t); - const auto &innerDirective = - std::get(innerBegin.t); - if (innerDirective.v == llvm::omp::Directive::OMPD_tile) { - clauses.append( - makeClauses(std::get(innerBegin.t), semaCtx)); - } - } - if (auto &endLoopDirective = std::get>( loopConstruct.t)) { diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 69d74762ace6f..e7fa9063b7ae2 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -14,6 +14,7 @@ #include "ClauseFinder.h" #include "flang/Lower/OpenMP/Clauses.h" +#include "flang/Evaluate/fold.h" #include #include #include @@ -24,10 +25,31 @@ #include #include #include +#include #include #include +using namespace Fortran::semantics; + +template +MaybeIntExpr +EvaluateIntExpr(SemanticsContext &context, const T &expr) { + if (MaybeExpr maybeExpr{ + Fold(context.foldingContext(), AnalyzeExpr(context, expr))}) { + if (auto *intExpr{Fortran::evaluate::UnwrapExpr(*maybeExpr)}) { + return std::move(*intExpr); + } + } + return std::nullopt; +} + +template +std::optional +EvaluateInt64(SemanticsContext &context, const T &expr) { + return Fortran::evaluate::ToInt64(EvaluateIntExpr(context, expr)); +} + llvm::cl::opt treatIndexAsSection( "openmp-treat-index-as-section", llvm::cl::desc("In the OpenMP data clauses treat `a(N)` as `a(N:N)`."), @@ -615,6 +637,43 @@ static void convertLoopBounds(lower::AbstractConverter &converter, } } +// Populates the sizes vector with values if the given OpenMPConstruct +// Contains a loop construct with an inner tiling construct. +void collectTileSizesFromOpenMPConstruct( + const parser::OpenMPConstruct *ompCons, + llvm::SmallVectorImpl &tileSizes, + SemanticsContext &semaCtx) { + if (!ompCons) + return; + + if (auto *ompLoop{std::get_if(&ompCons->u)}) { + const auto &innerOptional = std::get< + std::optional>>( + ompLoop->t); + if (innerOptional.has_value()) { + const auto &innerLoopDirective = innerOptional.value().value(); + const auto &innerBegin = + std::get(innerLoopDirective.t); + const auto &innerDirective = + std::get(innerBegin.t).v; + + if (innerDirective == llvm::omp::Directive::OMPD_tile) { + // Get the size values from parse tree and convert to a vector + const auto &innerClauseList{ + std::get(innerBegin.t)}; + for (const auto &clause : innerClauseList.v) + if (const auto tclause{ + std::get_if(&clause.u)}) { + for (auto &tval : tclause->v) { + if (const auto v{EvaluateInt64(semaCtx, tval)}) + tileSizes.push_back(*v); + } + } + } + } + } +} + bool collectLoopRelatedInfo( lower::AbstractConverter &converter, mlir::Location currentLocation, lower::pft::Evaluation &eval, const omp::List &clauses, @@ -636,11 +695,34 @@ bool collectLoopRelatedInfo( collapseValue = evaluate::ToInt64(clause->v).value(); found = true; } + + // Collect sizes from tile directive if present std::int64_t sizesLengthValue = 0l; - if (auto *clause = - ClauseFinder::findUniqueClause(clauses)) { - sizesLengthValue = clause->v.size(); - found = true; + if (auto *ompCons{eval.getIf()}) { + if (auto *ompLoop{std::get_if(&ompCons->u)}) { + const auto &innerOptional = std::get< + std::optional>>( + ompLoop->t); + if (innerOptional.has_value()) { + const auto &innerLoopDirective = innerOptional.value().value(); + const auto &innerBegin = + std::get(innerLoopDirective.t); + const auto &innerDirective = + std::get(innerBegin.t).v; + + if (innerDirective == llvm::omp::Directive::OMPD_tile) { + // Get the size values from parse tree and convert to a vector + const auto &innerClauseList{ + std::get(innerBegin.t)}; + for (const auto &clause : innerClauseList.v) + if (const auto tclause{ + std::get_if(&clause.u)}) { + sizesLengthValue = tclause->v.size(); + found = true; + } + } + } + } } collapseValue = collapseValue - sizesLengthValue; diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h index 60f44a7f0610c..bb42fb02efc09 100644 --- a/flang/lib/Lower/OpenMP/Utils.h +++ b/flang/lib/Lower/OpenMP/Utils.h @@ -175,6 +175,11 @@ bool collectLoopRelatedInfo( mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl &iv); +void collectTileSizesFromOpenMPConstruct( + const parser::OpenMPConstruct *ompCons, + llvm::SmallVectorImpl &tileSizes, + Fortran::semantics::SemanticsContext &semaCtx); + } // namespace omp } // namespace lower } // namespace Fortran diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h index e1083b7ef2cd9..5bb1f3f36b65e 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h @@ -497,10 +497,6 @@ bool ConstructDecompositionT::applyClause( if (llvm::omp::isAllowedClauseForDirective(last.id, node->id, version)) { last.clauses.push_back(node); return true; - } else { - // llvm::errs() << "** OVERRIDING isAllowedClauseForDirective **\n"; - last.clauses.push_back(node); - return true; } } From 7c7b6f101135d4c1768287f763bf6860533997ec Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Wed, 25 Jun 2025 10:33:33 -0400 Subject: [PATCH 20/64] Fix formatting --- flang/lib/Lower/OpenMP/OpenMP.cpp | 2 +- flang/lib/Lower/OpenMP/Utils.cpp | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 646c515280c76..4311c85eeb7d4 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1704,7 +1704,7 @@ genLoopNestClauses(lower::AbstractConverter &converter, llvm::SmallVector sizeValues; auto *ompCons{eval.getIf()}; - collectTileSizesFromOpenMPConstruct (ompCons, sizeValues, semaCtx); + collectTileSizesFromOpenMPConstruct(ompCons, sizeValues, semaCtx); if (sizeValues.size() > 0) clauseOps.tileSizes = sizeValues; } diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index e7fa9063b7ae2..e04a6eae98408 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -33,8 +33,7 @@ using namespace Fortran::semantics; template -MaybeIntExpr -EvaluateIntExpr(SemanticsContext &context, const T &expr) { +MaybeIntExpr EvaluateIntExpr(SemanticsContext &context, const T &expr) { if (MaybeExpr maybeExpr{ Fold(context.foldingContext(), AnalyzeExpr(context, expr))}) { if (auto *intExpr{Fortran::evaluate::UnwrapExpr(*maybeExpr)}) { @@ -45,8 +44,8 @@ EvaluateIntExpr(SemanticsContext &context, const T &expr) { } template -std::optional -EvaluateInt64(SemanticsContext &context, const T &expr) { +std::optional EvaluateInt64(SemanticsContext &context, + const T &expr) { return Fortran::evaluate::ToInt64(EvaluateIntExpr(context, expr)); } @@ -641,8 +640,7 @@ static void convertLoopBounds(lower::AbstractConverter &converter, // Contains a loop construct with an inner tiling construct. void collectTileSizesFromOpenMPConstruct( const parser::OpenMPConstruct *ompCons, - llvm::SmallVectorImpl &tileSizes, - SemanticsContext &semaCtx) { + llvm::SmallVectorImpl &tileSizes, SemanticsContext &semaCtx) { if (!ompCons) return; From 47b75c3947c82294cf467bdd61a86bf2e787c147 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Thu, 26 Jun 2025 10:03:13 -0400 Subject: [PATCH 21/64] Fix unparse and add a test for nested loop constructs. --- flang/test/Parser/OpenMP/do-tile-size.f90 | 29 +++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 flang/test/Parser/OpenMP/do-tile-size.f90 diff --git a/flang/test/Parser/OpenMP/do-tile-size.f90 b/flang/test/Parser/OpenMP/do-tile-size.f90 new file mode 100644 index 0000000000000..886ee4a2a680c --- /dev/null +++ b/flang/test/Parser/OpenMP/do-tile-size.f90 @@ -0,0 +1,29 @@ +! RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=51 %s | FileCheck --ignore-case %s +! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=51 %s | FileCheck --check-prefix="PARSE-TREE" %s + +subroutine openmp_do_tiles(x) + + integer, intent(inout)::x + + +!CHECK: !$omp do +!CHECK: !$omp tile sizes +!$omp do +!$omp tile sizes(2) +!CHECK: do + do x = 1, 100 + call F1() +!CHECK: end do + end do +!CHECK: !$omp end tile +!$omp end tile +!$omp end do + +!PARSE-TREE:| | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct +!PARSE-TREE:| | | OmpBeginLoopDirective +!PARSE-TREE:| | | OpenMPLoopConstruct +!PARSE-TREE:| | | | OmpBeginLoopDirective +!PARSE-TREE:| | | | | OmpLoopDirective -> llvm::omp::Directive = tile +!PARSE-TREE:| | | | | OmpClauseList -> OmpClause -> Sizes -> Scalar -> Integer -> Expr = '2_4' +!PARSE-TREE: | | | | DoConstruct +END subroutine openmp_do_tiles From 2203a350d8605baceb889e48de703f7528a3ceac Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Thu, 26 Jun 2025 10:50:08 -0400 Subject: [PATCH 22/64] Use more convenient function to get OpenMPLoopConstruct. Fix comments. --- flang/lib/Semantics/canonicalize-omp.cpp | 30 ++++++++----------- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 9 +++--- 2 files changed, 17 insertions(+), 22 deletions(-) diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index a7749b5a81678..79630b564e51a 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -152,23 +152,19 @@ class CanonicalizationOfOmp { // Keep track of the loops to handle the end loop directives llvm::SmallVector loops; loops.push_back(&x); - if (auto *innerConstruct{ - GetConstructIf(*nextIt)}) { - if (auto *innerOmpLoop{ - std::get_if(&innerConstruct->u)}) { - auto &innerBeginDir{ - std::get(innerOmpLoop->t)}; - auto &innerDir{std::get(innerBeginDir.t)}; - if (innerDir.v == llvm::omp::Directive::OMPD_tile) { - auto &innerLoop = std::get>>( - loops.back()->t); - innerLoop = std::move(*innerOmpLoop); - // Retrieveing the address so that DoConstruct or inner loop can be - // set later. - loops.push_back(&(innerLoop.value().value())); - nextIt = block.erase(nextIt); - } + if (auto *innerOmpLoop{GetOmpIf(*nextIt)}) { + auto &innerBeginDir{ + std::get(innerOmpLoop->t)}; + auto &innerDir{std::get(innerBeginDir.t)}; + if (innerDir.v == llvm::omp::Directive::OMPD_tile) { + auto &innerLoop = std::get< + std::optional>>( + loops.back()->t); + innerLoop = std::move(*innerOmpLoop); + // Retrieveing the address so that DoConstruct or inner loop can be + // set later. + loops.push_back(&(innerLoop.value().value())); + nextIt = block.erase(nextIt); } } diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index dccf241e919a7..842fdc0854d67 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -3034,7 +3034,6 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, loopInfos.push_back(*loopResult); } - // llvm::OpenMPIRBuilder::InsertPointTy afterIP = builder.saveIP(); llvm::OpenMPIRBuilder::InsertPointTy afterIP = loopInfos.front()->getAfterIP(); @@ -3055,10 +3054,10 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, std::vector NewLoops = ompBuilder->tileLoops(ompLoc.DL, loopInfos, TileSizes); - // Collapse loops. Store the insertion point because LoopInfos may get - // invalidated. - auto AfterBB = NewLoops.front()->getAfter(); - auto AfterAfterBB = AfterBB->getSingleSuccessor(); + // Update afterIP to get the correct insertion point after + // tiling. + llvm::BasicBlock *AfterBB = NewLoops.front()->getAfter(); + llvm::BasicBlock *AfterAfterBB = AfterBB->getSingleSuccessor(); afterIP = {AfterAfterBB, AfterAfterBB->begin()}; NewTopLoopInfo = NewLoops[0]; From 99cb790de98302ff254c699e2bc5e5db51497e13 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Thu, 26 Jun 2025 10:54:46 -0400 Subject: [PATCH 23/64] Fix formatting. --- .../Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 842fdc0854d67..183964372cadf 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -3057,7 +3057,7 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, // Update afterIP to get the correct insertion point after // tiling. llvm::BasicBlock *AfterBB = NewLoops.front()->getAfter(); - llvm::BasicBlock *AfterAfterBB = AfterBB->getSingleSuccessor(); + llvm::BasicBlock *AfterAfterBB = AfterBB->getSingleSuccessor(); afterIP = {AfterAfterBB, AfterAfterBB->begin()}; NewTopLoopInfo = NewLoops[0]; From 1ff74efa64434d7d98868bb5f3a3df5aff62c781 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Sat, 9 Aug 2025 10:53:23 -0400 Subject: [PATCH 24/64] Fix merge problems related to the different representations used for nested loop constructs. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 52 +++++----------------- flang/lib/Lower/OpenMP/Utils.cpp | 28 +++++++----- flang/lib/Semantics/canonicalize-omp.cpp | 10 +++-- flang/lib/Semantics/resolve-directives.cpp | 34 ++++++++------ 4 files changed, 56 insertions(+), 68 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 4311c85eeb7d4..e6a8ac1e3b1f3 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -422,14 +422,19 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, // FIXME(JAN): For now we check if there is an inner // OpenMPLoopConstruct, and extract the size clause from there - const auto &innerOptional = std::get>>( - ompConstruct.t); - if (innerOptional.has_value()) { - const auto &innerLoopDirective = innerOptional.value().value(); + const auto &nestedOptional = + std::get>( + ompConstruct.t); + assert(nestedOptional.has_value() && + "Expected a DoConstruct or OpenMPLoopConstruct"); + const auto *innerConstruct = + std::get_if>( + &(nestedOptional.value())); + if (innerConstruct) { + const auto &innerLoopConstruct = innerConstruct->value(); const auto &innerBegin = std::get( - innerLoopDirective.t); + innerLoopConstruct.t); const auto &innerDirective = std::get(innerBegin.t); if (innerDirective.v == llvm::omp::Directive::OMPD_tile) { @@ -2276,41 +2281,6 @@ static void genUnrollOp(Fortran::lower::AbstractConverter &converter, // Apply unrolling to it auto cli = canonLoop.getCli(); mlir::omp::UnrollHeuristicOp::create(firOpBuilder, loc, cli); - -static mlir::omp::LoopOp genTiledLoopOp(lower::AbstractConverter &converter, - lower::SymMap &symTable, - semantics::SemanticsContext &semaCtx, - lower::pft::Evaluation &eval, - mlir::Location loc, - const ConstructQueue &queue, - ConstructQueue::const_iterator item) { - mlir::omp::LoopOperands loopClauseOps; - llvm::SmallVector loopReductionSyms; - genLoopClauses(converter, semaCtx, item->clauses, loc, loopClauseOps, - loopReductionSyms); - - DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, - /*shouldCollectPreDeterminedSymbols=*/true, - /*useDelayedPrivatization=*/true, symTable); - dsp.processStep1(&loopClauseOps); - - mlir::omp::LoopNestOperands loopNestClauseOps; - llvm::SmallVector iv; - genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, - loopNestClauseOps, iv); - - EntryBlockArgs loopArgs; - loopArgs.priv.syms = dsp.getDelayedPrivSymbols(); - loopArgs.priv.vars = loopClauseOps.privateVars; - loopArgs.reduction.syms = loopReductionSyms; - loopArgs.reduction.vars = loopClauseOps.reductionVars; - - auto loopOp = - genWrapperOp(converter, loc, loopClauseOps, loopArgs); - genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, - loopNestClauseOps, iv, {{loopOp, loopArgs}}, - llvm::omp::Directive::OMPD_loop, dsp); - return loopOp; } static mlir::omp::MaskedOp diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index e04a6eae98408..dc58eecae7759 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -645,11 +645,15 @@ void collectTileSizesFromOpenMPConstruct( return; if (auto *ompLoop{std::get_if(&ompCons->u)}) { - const auto &innerOptional = std::get< - std::optional>>( - ompLoop->t); - if (innerOptional.has_value()) { - const auto &innerLoopDirective = innerOptional.value().value(); + const auto &nestedOptional = + std::get>(ompLoop->t); + assert(nestedOptional.has_value() && + "Expected a DoConstruct or OpenMPLoopConstruct"); + const auto *innerConstruct = + std::get_if>( + &(nestedOptional.value())); + if (innerConstruct) { + const auto &innerLoopDirective = innerConstruct->value(); const auto &innerBegin = std::get(innerLoopDirective.t); const auto &innerDirective = @@ -698,11 +702,15 @@ bool collectLoopRelatedInfo( std::int64_t sizesLengthValue = 0l; if (auto *ompCons{eval.getIf()}) { if (auto *ompLoop{std::get_if(&ompCons->u)}) { - const auto &innerOptional = std::get< - std::optional>>( - ompLoop->t); - if (innerOptional.has_value()) { - const auto &innerLoopDirective = innerOptional.value().value(); + const auto &nestedOptional = + std::get>(ompLoop->t); + assert(nestedOptional.has_value() && + "Expected a DoConstruct or OpenMPLoopConstruct"); + const auto *innerConstruct = + std::get_if>( + &(nestedOptional.value())); + if (innerConstruct) { + const auto &innerLoopDirective = innerConstruct->value(); const auto &innerBegin = std::get(innerLoopDirective.t); const auto &innerDirective = diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index 79630b564e51a..4792bf2cb217c 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -157,13 +157,15 @@ class CanonicalizationOfOmp { std::get(innerOmpLoop->t)}; auto &innerDir{std::get(innerBeginDir.t)}; if (innerDir.v == llvm::omp::Directive::OMPD_tile) { - auto &innerLoop = std::get< - std::optional>>( - loops.back()->t); + auto &innerLoopVariant = + std::get>(loops.back()->t); + auto &innerLoop = + std::get>( + innerLoopVariant.value()); innerLoop = std::move(*innerOmpLoop); // Retrieveing the address so that DoConstruct or inner loop can be // set later. - loops.push_back(&(innerLoop.value().value())); + loops.push_back(&(innerLoop.value())); nextIt = block.erase(nextIt); } } diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 934a06b2aef33..9a2caef34bd67 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -2061,12 +2061,18 @@ void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromInnerLoopContruct( const parser::OpenMPLoopConstruct &x, llvm::SmallVector &levels, llvm::SmallVector &clauses) { - const auto &innerOptional = - std::get>>( - x.t); - if (innerOptional.has_value()) { + + const auto &nestedOptional = + std::get>(x.t); + assert(nestedOptional.has_value() && + "Expected a DoConstruct or OpenMPLoopConstruct"); + const auto *innerConstruct = + std::get_if>( + &(nestedOptional.value())); + + if (innerConstruct) { CollectAssociatedLoopLevelsFromLoopConstruct( - innerOptional.value().value(), levels, clauses); + innerConstruct->value(), levels, clauses); } } @@ -2131,17 +2137,19 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel( bool hasCollapseClause{ clause ? (clause->Id() == llvm::omp::OMPC_collapse) : false}; const parser::OpenMPLoopConstruct *innerMostLoop = &x; - + const parser::NestedConstruct *innerMostNest = nullptr; while (auto &optLoopCons{ - std::get>(x.t)}) { - if (const auto &innerLoop{ - std::get_if < parser::OpenMPLoopConstruct >>> (innerMostLoop->t)}) { - innerMostLoop = &innerLoop.value().value(); + std::get>(innerMostLoop->t)}) { + innerMostNest = &(optLoopCons.value()); + if (const auto *innerLoop{ + std::get_if>( + innerMostNest)}) { + innerMostLoop = &(innerLoop->value()); } } - if (optLoopCons.has_value()) { - if (const auto &outer{std::get_if(innerMostLoop->t)}) { + if (innerMostNest) { + if (const auto &outer{std::get_if(innerMostNest)}) { for (const parser::DoConstruct *loop{&*outer}; loop && level > 0; --level) { if (loop->IsDoConcurrent()) { @@ -2177,7 +2185,7 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel( CheckAssocLoopLevel(level, GetAssociatedClause()); } else if (const auto &loop{std::get_if< common::Indirection>( - &*optLoopCons)}) { + innerMostNest)}) { auto &beginDirective = std::get(loop->value().t); auto &beginLoopDirective = From e49a3012d0178dd8bb3d601fa4bbf7bb67983645 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Sat, 9 Aug 2025 12:27:35 -0400 Subject: [PATCH 25/64] Fix bugs introduced when merging. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 4 ++-- flang/lib/Semantics/canonicalize-omp.cpp | 19 +++++++++--------- flang/lib/Semantics/resolve-directives.cpp | 3 ++- ...nested-loop-transformation-construct01.f90 | 20 ------------------- flang/test/Lower/OpenMP/wsloop-tile.f90 | 2 +- 5 files changed, 15 insertions(+), 33 deletions(-) delete mode 100644 flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90 diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index e6a8ac1e3b1f3..108d9a40c8d45 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -420,8 +420,8 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, beginClauseList = &std::get(beginDirective.t); - // FIXME(JAN): For now we check if there is an inner - // OpenMPLoopConstruct, and extract the size clause from there + // For now we check if there is an inner OpenMPLoopConstruct, and + // extract the size clause from there const auto &nestedOptional = std::get>( ompConstruct.t); diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index 4792bf2cb217c..c664171350d9e 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -143,7 +143,6 @@ class CanonicalizationOfOmp { "If a loop construct has been fully unrolled, it cannot then be tiled"_err_en_US, parser::ToUpperCaseLetters(dir.source.ToString())); }; - nextIt = it; while (++nextIt != block.end()) { // Ignore compiler directives. @@ -159,14 +158,16 @@ class CanonicalizationOfOmp { if (innerDir.v == llvm::omp::Directive::OMPD_tile) { auto &innerLoopVariant = std::get>(loops.back()->t); - auto &innerLoop = - std::get>( - innerLoopVariant.value()); - innerLoop = std::move(*innerOmpLoop); - // Retrieveing the address so that DoConstruct or inner loop can be - // set later. - loops.push_back(&(innerLoop.value())); - nextIt = block.erase(nextIt); + if (innerLoopVariant.has_value()) { + auto *innerLoop = + std::get_if>( + &(innerLoopVariant.value())); + *innerLoop = std::move(*innerOmpLoop); + // Retrieveing the address so that DoConstruct or inner loop can be + // set later. + loops.push_back(&(innerLoop->value())); + nextIt = block.erase(nextIt); + } } } diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 9a2caef34bd67..1694da571c55e 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -2145,7 +2145,8 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel( std::get_if>( innerMostNest)}) { innerMostLoop = &(innerLoop->value()); - } + } else + break; } if (innerMostNest) { diff --git a/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90 b/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90 deleted file mode 100644 index 17eba93a7405d..0000000000000 --- a/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90 +++ /dev/null @@ -1,20 +0,0 @@ -! Test to ensure TODO message is emitted for tile OpenMP 5.1 Directives when they are nested. - -!RUN: not %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 -o - %s 2>&1 | FileCheck %s - -subroutine loop_transformation_construct - implicit none - integer :: I = 10 - integer :: x - integer :: y(I) - - !$omp do - !$omp tile - do i = 1, I - y(i) = y(i) * 5 - end do - !$omp end tile - !$omp end do -end subroutine - -!CHECK: not yet implemented: Unhandled loop directive (tile) diff --git a/flang/test/Lower/OpenMP/wsloop-tile.f90 b/flang/test/Lower/OpenMP/wsloop-tile.f90 index c9bf18e3b278d..4c412b357f52e 100644 --- a/flang/test/Lower/OpenMP/wsloop-tile.f90 +++ b/flang/test/Lower/OpenMP/wsloop-tile.f90 @@ -2,7 +2,7 @@ ! RUN: bbc -fopenmp -fopenmp-version=51 -emit-hlfir %s -o - | FileCheck %s -!CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "wsloop_tile"} { +!CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "WSLOOP_TILE"} { program wsloop_tile integer :: i, j, k integer :: a, b, c From 9b770ab3caceb48b44221bce71ebaaa7a51114f0 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Mon, 11 Aug 2025 07:23:15 -0400 Subject: [PATCH 26/64] Move include --- flang/lib/Lower/OpenMP/Utils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index dc58eecae7759..07f562fa6a4b1 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -13,8 +13,8 @@ #include "Utils.h" #include "ClauseFinder.h" -#include "flang/Lower/OpenMP/Clauses.h" #include "flang/Evaluate/fold.h" +#include "flang/Lower/OpenMP/Clauses.h" #include #include #include From d6ceeb0b1be2f4c0638e1430f90cfa2fbfbfffc1 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Tue, 19 Aug 2025 10:55:05 -0400 Subject: [PATCH 27/64] Remove unused code. Currently the canonicalize-omp can only handle a single nested loop construct, which is what we prefer. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 1 - flang/lib/Lower/OpenMP/Utils.cpp | 22 ++---- flang/lib/Semantics/canonicalize-omp.cpp | 68 ++++--------------- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 2 - 4 files changed, 20 insertions(+), 73 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 108d9a40c8d45..5ec7cb632159a 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -937,7 +937,6 @@ static void genLoopVars( storeOp = createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol); } - firOpBuilder.setInsertionPointAfter(storeOp); } diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 07f562fa6a4b1..6c9763e5a37ab 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -59,21 +59,14 @@ namespace lower { namespace omp { int64_t getCollapseValue(const List &clauses) { - int64_t collapseValue = 1; - int64_t numTileSizes = 0; - for (auto &clause : clauses) { - if (clause.id == llvm::omp::Clause::OMPC_collapse) { - const auto &collapse = std::get(clause.u); - collapseValue = evaluate::ToInt64(collapse.v).value(); - } else if (clause.id == llvm::omp::Clause::OMPC_sizes) { - const auto &sizes = std::get(clause.u); - numTileSizes = sizes.v.size(); - } + auto iter = llvm::find_if(clauses, [](const Clause &clause) { + return clause.id == llvm::omp::Clause::OMPC_collapse; + }); + if (iter != clauses.end()) { + const auto &collapse = std::get(iter->u); + return evaluate::ToInt64(collapse.v).value(); } - - collapseValue = collapseValue - numTileSizes; - int64_t result = collapseValue > numTileSizes ? collapseValue : numTileSizes; - return result; + return 1; } void genObjectList(const ObjectList &objects, @@ -681,7 +674,6 @@ bool collectLoopRelatedInfo( lower::pft::Evaluation &eval, const omp::List &clauses, mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl &iv) { - bool found = false; fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index c664171350d9e..9722eca19447d 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -10,6 +10,7 @@ #include "flang/Parser/parse-tree-visitor.h" #include "flang/Parser/parse-tree.h" #include "flang/Semantics/semantics.h" + // After Loop Canonicalization, rewrite OpenMP parse tree to make OpenMP // Constructs more structured which provide explicit scopes for later // structural checks and semantic analysis. @@ -116,19 +117,15 @@ class CanonicalizationOfOmp { // in the same iteration // // Original: - // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct t-> - // OmpBeginLoopDirective t-> OmpLoopDirective - // [ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct u-> - // OmpBeginLoopDirective t-> OmpLoopDirective t-> Tile v-> OMP_tile] + // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct + // OmpBeginLoopDirective // ExecutableConstruct -> DoConstruct - // [ExecutableConstruct -> OmpEndLoopDirective] (note: tile) // ExecutableConstruct -> OmpEndLoopDirective (if available) // // After rewriting: - // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct t-> - // [OpenMPLoopConstruct t -> OmpBeginLoopDirective -> OmpLoopDirective - // OmpEndLoopDirective] (note: tile) - // OmpBeginLoopDirective t -> OmpLoopDirective -> DoConstruct + // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct + // OmpBeginLoopDirective + // DoConstruct // OmpEndLoopDirective (if available) parser::Block::iterator nextIt; auto &beginDir{std::get(x.t)}; @@ -143,66 +140,27 @@ class CanonicalizationOfOmp { "If a loop construct has been fully unrolled, it cannot then be tiled"_err_en_US, parser::ToUpperCaseLetters(dir.source.ToString())); }; + nextIt = it; while (++nextIt != block.end()) { // Ignore compiler directives. if (GetConstructIf(*nextIt)) continue; - // Keep track of the loops to handle the end loop directives - llvm::SmallVector loops; - loops.push_back(&x); - if (auto *innerOmpLoop{GetOmpIf(*nextIt)}) { - auto &innerBeginDir{ - std::get(innerOmpLoop->t)}; - auto &innerDir{std::get(innerBeginDir.t)}; - if (innerDir.v == llvm::omp::Directive::OMPD_tile) { - auto &innerLoopVariant = - std::get>(loops.back()->t); - if (innerLoopVariant.has_value()) { - auto *innerLoop = - std::get_if>( - &(innerLoopVariant.value())); - *innerLoop = std::move(*innerOmpLoop); - // Retrieveing the address so that DoConstruct or inner loop can be - // set later. - loops.push_back(&(innerLoop->value())); - nextIt = block.erase(nextIt); - } - } - } if (auto *doCons{GetConstructIf(*nextIt)}) { if (doCons->GetLoopControl()) { // move DoConstruct std::get>>>( - loops.back()->t) = std::move(*doCons); + common::Indirection>>>(x.t) = + std::move(*doCons); nextIt = block.erase(nextIt); // try to match OmpEndLoopDirective - while (nextIt != block.end() && !loops.empty()) { + if (nextIt != block.end()) { if (auto *endDir{ GetConstructIf(*nextIt)}) { - auto &endOmpDirective{ - std::get(endDir->t)}; - auto &loopBegin{ - std::get(loops.back()->t)}; - auto &loopDir{std::get(loopBegin.t)}; - - // If the directive is a tile we try to match the corresponding - // end tile if it exsists. If it is not a tile directive we - // always assign the end loop directive and fall back on the - // existing directive structure checks. - if (loopDir.v != llvm::omp::Directive::OMPD_tile || - loopDir.v == endOmpDirective.v) { - std::get>( - loops.back()->t) = std::move(*endDir); - nextIt = block.erase(nextIt); - } - - loops.pop_back(); - } else { - // If there is a mismatch bail out. - break; + std::get>(x.t) = + std::move(*endDir); + nextIt = block.erase(nextIt); } } } else { diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 183964372cadf..03d3cc57895c7 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -3037,8 +3037,6 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointTy afterIP = loopInfos.front()->getAfterIP(); - // Initialize the new loop info to the current one, in case there - // are no loop transformations done. llvm::CanonicalLoopInfo *NewTopLoopInfo = nullptr; // Do tiling From dd74eac78a3e153ee8434d5f6c341a277e7718bd Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Thu, 21 Aug 2025 22:15:45 -0400 Subject: [PATCH 28/64] Address review comments. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 1 - flang/lib/Lower/OpenMP/Utils.cpp | 4 +- flang/lib/Semantics/resolve-directives.cpp | 54 +++++++++---------- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 9 ---- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 24 --------- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 5 +- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 3 +- mlir/test/Dialect/OpenMP/invalid.mlir | 2 +- 8 files changed, 33 insertions(+), 69 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 5ec7cb632159a..355ba9428995e 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -3944,7 +3944,6 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, std::get(loopConstruct.t); List clauses = makeClauses( std::get(beginLoopDirective.t), semaCtx); - if (auto &endLoopDirective = std::get>( loopConstruct.t)) { diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 6c9763e5a37ab..2a6cb0d2d4a74 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -629,8 +629,8 @@ static void convertLoopBounds(lower::AbstractConverter &converter, } } -// Populates the sizes vector with values if the given OpenMPConstruct -// Contains a loop construct with an inner tiling construct. +/// Populates the sizes vector with values if the given OpenMPConstruct +/// Contains a loop construct with an inner tiling construct. void collectTileSizesFromOpenMPConstruct( const parser::OpenMPConstruct *ompCons, llvm::SmallVectorImpl &tileSizes, SemanticsContext &semaCtx) { diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 1694da571c55e..20df9d01de0c0 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -817,18 +817,22 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { const parser::OmpClause *GetAssociatedClause() { return associatedClause; } private: + /// Given a vector of loop levels and a vector of corresponding clauses find + /// the largest loop level and set the associated loop level to the found + /// maximum. This is used for error handling to ensure that the number of + /// affected loops is not larger that the number of available loops. std::int64_t SetAssociatedMaxClause(llvm::SmallVector &, llvm::SmallVector &); - std::int64_t GetAssociatedLoopLevelFromLoopConstruct( + std::int64_t GetNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &); - std::int64_t GetAssociatedLoopLevelFromClauses(const parser::OmpClauseList &); - void CollectAssociatedLoopLevelsFromLoopConstruct( + std::int64_t GetNumAffectedLoopsFromClauses(const parser::OmpClauseList &); + void CollectNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &, llvm::SmallVector &, llvm::SmallVector &); - void CollectAssociatedLoopLevelsFromInnerLoopContruct( + void CollectNumAffectedLoopsFromInnerLoopContruct( const parser::OpenMPLoopConstruct &, llvm::SmallVector &, llvm::SmallVector &); - void CollectAssociatedLoopLevelsFromClauses(const parser::OmpClauseList &, + void CollectNumAffectedLoopsFromClauses(const parser::OmpClauseList &, llvm::SmallVector &, llvm::SmallVector &); @@ -1885,7 +1889,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { beginDir.v == llvm::omp::Directive::OMPD_target_loop) IssueNonConformanceWarning(beginDir.v, beginDir.source, 52); ClearDataSharingAttributeObjects(); - SetContextAssociatedLoopLevel(GetAssociatedLoopLevelFromLoopConstruct(x)); + SetContextAssociatedLoopLevel(GetNumAffectedLoopsFromLoopConstruct(x)); if (beginDir.v == llvm::omp::Directive::OMPD_do) { auto &optLoopCons = std::get>(x.t); @@ -1899,7 +1903,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { } } PrivatizeAssociatedLoopIndexAndCheckLoopLevel(x); - ordCollapseLevel = GetAssociatedLoopLevelFromLoopConstruct(x) + 1; + ordCollapseLevel = GetNumAffectedLoopsFromLoopConstruct(x) + 1; return true; } @@ -1995,13 +1999,12 @@ std::int64_t OmpAttributeVisitor::SetAssociatedMaxClause( llvm::SmallVector &levels, llvm::SmallVector &clauses) { - // Find the tile level to know how much to reduce the level for collapse + // Find the tile level to ensure that the COLLAPSE clause value + // does not exeed the number of tiled loops. std::int64_t tileLevel = 0; - for (auto [level, clause] : llvm::zip_equal(levels, clauses)) { - if (isSizesClause(clause)) { + for (auto [level, clause] : llvm::zip_equal(levels, clauses)) + if (isSizesClause(clause)) tileLevel = level; - } - } std::int64_t maxLevel = 1; const parser::OmpClause *maxClause = nullptr; @@ -2010,14 +2013,11 @@ std::int64_t OmpAttributeVisitor::SetAssociatedMaxClause( context_.Say(clause->source, "The value of the parameter in the COLLAPSE clause must" " not be larger than the number of the number of tiled loops" - " because collapse relies on independent loop iterations."_err_en_US); + " because collapse currently is limited to independent loop" + " iterations."_err_en_US); return 1; } - if (!isSizesClause(clause)) { - level = level - tileLevel; - } - if (level > maxLevel) { maxLevel = level; maxClause = clause; @@ -2028,36 +2028,36 @@ std::int64_t OmpAttributeVisitor::SetAssociatedMaxClause( return maxLevel; } -std::int64_t OmpAttributeVisitor::GetAssociatedLoopLevelFromLoopConstruct( +std::int64_t OmpAttributeVisitor::GetNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &x) { llvm::SmallVector levels; llvm::SmallVector clauses; - CollectAssociatedLoopLevelsFromLoopConstruct(x, levels, clauses); + CollectNumAffectedLoopsFromLoopConstruct(x, levels, clauses); return SetAssociatedMaxClause(levels, clauses); } -std::int64_t OmpAttributeVisitor::GetAssociatedLoopLevelFromClauses( +std::int64_t OmpAttributeVisitor::GetNumAffectedLoopsFromClauses( const parser::OmpClauseList &x) { llvm::SmallVector levels; llvm::SmallVector clauses; - CollectAssociatedLoopLevelsFromClauses(x, levels, clauses); + CollectNumAffectedLoopsFromClauses(x, levels, clauses); return SetAssociatedMaxClause(levels, clauses); } -void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromLoopConstruct( +void OmpAttributeVisitor::CollectNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &x, llvm::SmallVector &levels, llvm::SmallVector &clauses) { const auto &beginLoopDir{std::get(x.t)}; const auto &clauseList{std::get(beginLoopDir.t)}; - CollectAssociatedLoopLevelsFromClauses(clauseList, levels, clauses); - CollectAssociatedLoopLevelsFromInnerLoopContruct(x, levels, clauses); + CollectNumAffectedLoopsFromClauses(clauseList, levels, clauses); + CollectNumAffectedLoopsFromInnerLoopContruct(x, levels, clauses); } -void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromInnerLoopContruct( +void OmpAttributeVisitor::CollectNumAffectedLoopsFromInnerLoopContruct( const parser::OpenMPLoopConstruct &x, llvm::SmallVector &levels, llvm::SmallVector &clauses) { @@ -2071,12 +2071,12 @@ void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromInnerLoopContruct( &(nestedOptional.value())); if (innerConstruct) { - CollectAssociatedLoopLevelsFromLoopConstruct( + CollectNumAffectedLoopsFromLoopConstruct( innerConstruct->value(), levels, clauses); } } -void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromClauses( +void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses( const parser::OmpClauseList &x, llvm::SmallVector &levels, llvm::SmallVector &clauses) { for (const auto &clause : x.v) { diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index a994f23c1fbe2..1050e3d8b08dd 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -2257,9 +2257,6 @@ class OpenMPIRBuilder { /// Return the function that contains the region to be outlined. Function *getFunction() const { return EntryBB->getParent(); } - - /// Dump the info in a somewhat readable way - void dump(); }; /// Collection of regions that need to be outlined during finalization. @@ -2280,9 +2277,6 @@ class OpenMPIRBuilder { /// Add a new region that will be outlined later. void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); } - /// Dump outline infos - void dumpOutlineInfos(); - /// An ordered map of auto-generated variables to their unique names. /// It stores variables with the following names: 1) ".gomp_critical_user_" + /// + ".var" for "omp critical" directives; 2) @@ -3916,9 +3910,6 @@ class CanonicalLoopInfo { /// Invalidate this loop. That is, the underlying IR does not fulfill the /// requirements of an OpenMP canonical loop anymore. LLVM_ABI void invalidate(); - - /// Dump the info in a somewhat readable way - void dump(); }; /// ScanInfo holds the information to assist in lowering of Scan reduction. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index ff50dfbbd5259..989bcf45e0006 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -9145,15 +9145,6 @@ Error OpenMPIRBuilder::emitOffloadingArrays( return Error::success(); } -void OpenMPIRBuilder::dumpOutlineInfos() { - errs() << "=== Outline Infos Begin ===\n"; - for (auto En : enumerate(OutlineInfos)) { - errs() << "[" << En.index() << "]: "; - En.value().dump(); - } - errs() << "=== Outline Infos End ===\n"; -} - void OpenMPIRBuilder::emitBranch(BasicBlock *Target) { BasicBlock *CurBB = Builder.GetInsertBlock(); @@ -10078,14 +10069,6 @@ void OpenMPIRBuilder::OutlineInfo::collectBlocks( } } -void OpenMPIRBuilder::OutlineInfo::dump() { - errs() << "=== OutilneInfo == " - << " EntryBB: " << (EntryBB ? EntryBB->getName() : "n\a") - << " ExitBB: " << (ExitBB ? ExitBB->getName() : "n\a") - << " OuterAllocaBB: " - << (OuterAllocaBB ? OuterAllocaBB->getName() : "n/a") << "\n"; -} - void OpenMPIRBuilder::createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, GlobalValue::LinkageTypes, @@ -10863,10 +10846,3 @@ void CanonicalLoopInfo::invalidate() { Latch = nullptr; Exit = nullptr; } - -void CanonicalLoopInfo::dump() { - errs() << "CanonicaLoop == Header: " << (Header ? Header->getName() : "n/a") - << " Cond: " << (Cond ? Cond->getName() : "n/a") - << " Latch: " << (Latch ? Latch->getName() : "n/a") - << " Exit: " << (Exit ? Exit->getName() : "n/a") << "\n"; -} diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index bac07e1ac17d5..5f9b6e29375d4 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -2961,7 +2961,7 @@ ParseResult LoopNestOp::parse(OpAsmParser &parser, OperationState &result) { for (auto &iv : ivs) iv.type = loopVarType; - auto ctx = parser.getBuilder().getContext(); + auto *ctx = parser.getBuilder().getContext(); // Parse "inclusive" flag. if (succeeded(parser.parseOptionalKeyword("inclusive"))) result.addAttribute("loop_inclusive", UnitAttr::get(ctx)); @@ -3065,8 +3065,7 @@ LogicalResult LoopNestOp::verify() { if (const auto &tiles = getTileSizes()) if (tiles.value().size() > numIVs) - return emitOpError() - << "number of tilings is larger than the number of loops"; + return emitOpError() << "too few canonical loops for tile dimensions"; if (!llvm::dyn_cast_if_present((*this)->getParentOp())) return emitOpError() << "expects parent op to be a loop wrapper"; diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 03d3cc57895c7..a8f4b3e585f22 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -3061,9 +3061,8 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, // Update the loop infos loopInfos.clear(); - for (const auto &newLoop : NewLoops) { + for (const auto &newLoop : NewLoops) loopInfos.push_back(newLoop); - } } // Tiling done // Do collapse diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir index c6b4ae02602d9..8072354d02ccd 100644 --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -172,7 +172,7 @@ func.func @collapse_size(%lb : index, %ub : index, %step : index) { func.func @tiles_length(%lb : index, %ub : index, %step : index) { omp.wsloop { - // expected-error@+1 {{number of tilings is larger than the number of loops}} + // expected-error@+1 {{op too few canonical loops for tile dimensions}} omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) tiles(2, 4) { omp.yield } From 432273a6d1419655d14bc5ed6d884354fca6cc63 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Thu, 21 Aug 2025 23:00:42 -0400 Subject: [PATCH 29/64] Undo unrelated change. --- flang/lib/Lower/OpenMP/Utils.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 2a6cb0d2d4a74..9d280985a27a4 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -758,6 +758,7 @@ bool collectLoopRelatedInfo( } while (collapseValue > 0); convertLoopBounds(converter, currentLocation, result, loopVarTypeSize); + return found; } From 5dafa147f3e41584935b73b7538bed8953a2b4b4 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 26 Aug 2025 00:51:50 +0200 Subject: [PATCH 30/64] Proof-of-concept implementation of loop interchange --- flang/lib/Lower/OpenMP/OpenMP.cpp | 70 ++++++++++++++++--- flang/lib/Lower/OpenMP/Utils.cpp | 68 +++++++++++++++++- flang/lib/Lower/OpenMP/Utils.h | 7 ++ flang/lib/Parser/openmp-parsers.cpp | 1 + flang/lib/Parser/unparse.cpp | 3 + flang/lib/Semantics/CMakeLists.txt | 42 ++++++----- flang/lib/Semantics/canonicalize-omp.cpp | 12 ++-- flang/lib/Semantics/resolve-directives.cpp | 52 +++++++++++--- .../Frontend/OpenMP/ConstructDecompositionT.h | 20 ++++++ .../mlir/Dialect/OpenMP/OpenMPClauses.td | 20 ++++++ mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 3 +- .../Conversion/SCFToOpenMP/SCFToOpenMP.cpp | 2 +- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 9 ++- 13 files changed, 261 insertions(+), 48 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 355ba9428995e..154361e28734b 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -440,6 +440,10 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, if (innerDirective.v == llvm::omp::Directive::OMPD_tile) { middleClauseList = &std::get(innerBegin.t); + } + if (innerDirective.v == llvm::omp::Directive::OMPD_interchange) { + llvm_unreachable("MK: Handle this"); + middleClauseList = &std::get(innerBegin.t); } } if (auto &endDirective = @@ -1191,7 +1195,10 @@ struct OpWithBodyGenInfo { /// \param [in] item - item in the queue to generate body for. static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info, const ConstructQueue &queue, - ConstructQueue::const_iterator item) { + ConstructQueue::const_iterator item) { int a = 0; + if (a) { + op.dump(); + } fir::FirOpBuilder &firOpBuilder = info.converter.getFirOpBuilder(); auto insertMarker = [](fir::FirOpBuilder &builder) { @@ -1330,7 +1337,10 @@ static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info, // present). Otherwise, these operations will be inserted within a // wrapper region. mlir::Operation *privatizationBottomLevelOp = &op; - if (auto loopNest = llvm::dyn_cast(op)) { + if (auto loopNest = llvm::dyn_cast(op)) { int b = 0; + if (b) { + loopNest.dump(); + } llvm::SmallVector wrappers; loopNest.gatherWrappers(wrappers); if (!wrappers.empty()) @@ -1679,7 +1689,7 @@ genLoopNestClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const List &clauses, mlir::Location loc, mlir::omp::LoopNestOperands &clauseOps, - llvm::SmallVectorImpl &iv) { + llvm::SmallVectorImpl &iv, bool enableInterchange = false) { ClauseProcessor cp(converter, semaCtx, clauses); HostEvalInfo *hostEvalInfo = getHostEvalInfoStackTop(converter); @@ -1703,7 +1713,9 @@ genLoopNestClauses(lower::AbstractConverter &converter, sizeValues.push_back(sizeValue); } clauseOps.tileSizes = sizeValues; - } + } else if (clause.id == llvm::omp::Clause::OMPC_permutation) { +llvm_unreachable("MK: To handle standalone interchange construct"); + } } llvm::SmallVector sizeValues; @@ -1711,6 +1723,13 @@ genLoopNestClauses(lower::AbstractConverter &converter, collectTileSizesFromOpenMPConstruct(ompCons, sizeValues, semaCtx); if (sizeValues.size() > 0) clauseOps.tileSizes = sizeValues; + + llvm::SmallVector permutationValues; collectPermutationFromOpenMPConstruct(ompCons, permutationValues, semaCtx); + if (enableInterchange) { + permutationValues.append({2,1}); + } + clauseOps.interchangeEnabled = mlir:: BoolAttr::get( firOpBuilder.getContext() , enableInterchange); + clauseOps.permutation = permutationValues; } static void genLoopClauses( @@ -2103,7 +2122,7 @@ static mlir::omp::LoopOp genLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, - ConstructQueue::const_iterator item) { + ConstructQueue::const_iterator item, llvm::omp::Directive dir = llvm::omp::Directive::OMPD_loop , bool enableInterchange = false) { mlir::omp::LoopOperands loopClauseOps; llvm::SmallVector loopReductionSyms; genLoopClauses(converter, semaCtx, item->clauses, loc, loopClauseOps, @@ -2117,7 +2136,7 @@ genLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable, mlir::omp::LoopNestOperands loopNestClauseOps; llvm::SmallVector iv; genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, - loopNestClauseOps, iv); + loopNestClauseOps, iv, enableInterchange); EntryBlockArgs loopArgs; loopArgs.priv.syms = dsp.getDelayedPrivSymbols(); @@ -2125,11 +2144,35 @@ genLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable, loopArgs.reduction.syms = loopReductionSyms; loopArgs.reduction.vars = loopClauseOps.reductionVars; + // Applying interchange clause + // tiling assumed to be applied after interchange + if (loopNestClauseOps.permutation.size() >=1) { + assert(loopNestClauseOps.permutation.size() == iv.size() && "TODO: if permutation is smaller than number of associated loops, permute only the first loops"); + llvm::SmallVector newIVs; + llvm::SmallVector newLBs; + llvm::SmallVector newUBs; + llvm::SmallVector newINCs; llvm::SmallVector newSizes; + + // TODO: Assert this is a valid permution + for (auto perm :loopNestClauseOps.permutation) { + newIVs.push_back(iv[perm-1]); + newLBs.push_back( loopNestClauseOps.loopLowerBounds[perm-1] ); + newUBs.push_back( loopNestClauseOps.loopUpperBounds[perm-1] ); + newINCs.push_back( loopNestClauseOps.loopSteps [perm-1] ); if (! loopNestClauseOps.tileSizes.empty()) newSizes.push_back( loopNestClauseOps.tileSizes[perm-1]); + } + + iv = newIVs; + loopNestClauseOps.loopLowerBounds = newLBs; + loopNestClauseOps.loopUpperBounds = newUBs; + loopNestClauseOps.loopSteps = newINCs; loopNestClauseOps.tileSizes = newSizes; + } + + //if (dir == llvm::omp::Directive::OMPD_loop) { auto loopOp = genWrapperOp(converter, loc, loopClauseOps, loopArgs); + // } genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, - loopNestClauseOps, iv, {{loopOp, loopArgs}}, - llvm::omp::Directive::OMPD_loop, dsp); + loopNestClauseOps, iv, {{loopOp, loopArgs}}, dir, dsp); return loopOp; } @@ -3506,6 +3549,11 @@ static void genOMPDispatch(lower::AbstractConverter &converter, case llvm::omp::Directive::OMPD_unroll: genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; + case llvm::omp::Directive::OMPD_interchange: + newOp = genLoopOp(converter, symTable, semaCtx, eval, loc, queue, item , llvm::omp::Directive::OMPD_interchange, /*Interchange=*/true); +//llvm_unreachable("MK: implement interchange"); +//genInterchangeOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); + break; // case llvm::omp::Directive::OMPD_workdistribute: case llvm::omp::Directive::OMPD_workshare: newOp = genWorkshareOp(converter, symTable, stmtCtx, semaCtx, eval, loc, @@ -3967,6 +4015,10 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, // Emit the omp.loop_nest with annotation for tiling genOMP(converter, symTable, semaCtx, eval, ompNestedLoopCons->value()); break; + case llvm::omp::Directive::OMPD_interchange: + genOMP(converter, symTable, semaCtx, eval, ompNestedLoopCons->value()); + // llvm_unreachable("MK: implement nested interchange"); + break; default: { unsigned version = semaCtx.langOptions().OpenMPVersion; TODO(currentLocation, @@ -3978,7 +4030,7 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, } } - llvm::omp::Directive directive = + llvm::omp::Directive directive = parser::omp::GetOmpDirectiveName(beginLoopDirective).v; const parser::CharBlock &source = std::get(beginLoopDirective.t).source; diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 9d280985a27a4..ccfecabcf8a50 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -669,6 +669,47 @@ void collectTileSizesFromOpenMPConstruct( } } + +/// Populates the sizes vector with values if the given OpenMPConstruct +/// Contains a loop construct with an inner tiling construct. +void collectPermutationFromOpenMPConstruct( + const parser::OpenMPConstruct *ompCons, + llvm::SmallVectorImpl &permutation, SemanticsContext &semaCtx) { + if (!ompCons) + return; + + if (auto *ompLoop{std::get_if(&ompCons->u)}) { + const auto &nestedOptional = + std::get>(ompLoop->t); + assert(nestedOptional.has_value() && + "Expected a DoConstruct or OpenMPLoopConstruct"); + const auto *innerConstruct = + std::get_if>( + &(nestedOptional.value())); + if (innerConstruct) { + const auto &innerLoopDirective = innerConstruct->value(); + const auto &innerBegin = + std::get(innerLoopDirective.t); + const auto &innerDirective = + std::get(innerBegin.t).v; + + if (innerDirective == llvm::omp::Directive::OMPD_interchange) { + // Get the size values from parse tree and convert to a vector + const auto &innerClauseList{ + std::get(innerBegin.t)}; + for (const auto &clause : innerClauseList.v) + if (const auto tclause{ + std::get_if(&clause.u)}) { + for (auto &tval : tclause->v) { + if (const auto v{EvaluateInt64(semaCtx, tval)}) + permutation.push_back(*v); + } + } + } + } + } +} + bool collectLoopRelatedInfo( lower::AbstractConverter &converter, mlir::Location currentLocation, lower::pft::Evaluation &eval, const omp::List &clauses, @@ -692,6 +733,7 @@ bool collectLoopRelatedInfo( // Collect sizes from tile directive if present std::int64_t sizesLengthValue = 0l; + std::int64_t permutationLengthValue = 0l; if (auto *ompCons{eval.getIf()}) { if (auto *ompLoop{std::get_if(&ompCons->u)}) { const auto &nestedOptional = @@ -719,13 +761,33 @@ bool collectLoopRelatedInfo( found = true; } } + + if (innerDirective == llvm::omp::Directive::OMPD_interchange) { + // Get the size values from parse tree and convert to a vector + const auto &innerClauseList{std::get(innerBegin.t)}; + for (const auto &clause : innerClauseList.v){ + if (const auto tclause{ std::get_if(&clause.u)}) { + permutationLengthValue = tclause->v.size(); + found = true; + } + } + // default: permution(2,1) + if (permutationLengthValue == 0) + permutationLengthValue = 2; + } } } } - collapseValue = collapseValue - sizesLengthValue; - collapseValue = - collapseValue < sizesLengthValue ? sizesLengthValue : collapseValue; + + +collapseValue = collapseValue - sizesLengthValue; +if (sizesLengthValue > collapseValue) + collapseValue = sizesLengthValue; +if (permutationLengthValue > collapseValue) + collapseValue = permutationLengthValue; + + std::size_t loopVarTypeSize = 0; do { lower::pft::Evaluation *doLoop = diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h index bb42fb02efc09..5362c667d0575 100644 --- a/flang/lib/Lower/OpenMP/Utils.h +++ b/flang/lib/Lower/OpenMP/Utils.h @@ -180,6 +180,13 @@ void collectTileSizesFromOpenMPConstruct( llvm::SmallVectorImpl &tileSizes, Fortran::semantics::SemanticsContext &semaCtx); + +void collectPermutationFromOpenMPConstruct( + const parser::OpenMPConstruct *ompCons, + llvm::SmallVectorImpl &permuation, + Fortran::semantics::SemanticsContext &semaCtx); + + } // namespace omp } // namespace lower } // namespace Fortran diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index 56cee4ab38e9b..4625225fd61a3 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -1393,6 +1393,7 @@ TYPE_PARSER(sourced(construct(first( "TEAMS DISTRIBUTE" >> pure(llvm::omp::Directive::OMPD_teams_distribute), "TEAMS LOOP" >> pure(llvm::omp::Directive::OMPD_teams_loop), "TILE" >> pure(llvm::omp::Directive::OMPD_tile), + "INTERCHANGE" >> pure(llvm::omp::Directive::OMPD_interchange), "UNROLL" >> pure(llvm::omp::Directive::OMPD_unroll))))) TYPE_PARSER(sourced(construct( diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index 09dcfe60a46bc..fca2bc5af4511 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2510,6 +2510,9 @@ class UnparseVisitor { case llvm::omp::Directive::OMPD_tile: Word("TILE "); break; + case llvm::omp::Directive::OMPD_interchange: + Word("INTERCHANGE "); + break; case llvm::omp::Directive::OMPD_unroll: Word("UNROLL "); break; diff --git a/flang/lib/Semantics/CMakeLists.txt b/flang/lib/Semantics/CMakeLists.txt index 109bc2dbb8569..414b59812aa72 100644 --- a/flang/lib/Semantics/CMakeLists.txt +++ b/flang/lib/Semantics/CMakeLists.txt @@ -1,10 +1,4 @@ -add_flang_library(FortranSemantics - assignment.cpp - attr.cpp - canonicalize-acc.cpp - canonicalize-directives.cpp - canonicalize-do.cpp - canonicalize-omp.cpp +add_flang_library(FortranSemanticsChecks PARTIAL_SOURCES_INTENDED check-acc-structure.cpp check-allocate.cpp check-arithmeticif.cpp @@ -29,6 +23,30 @@ add_flang_library(FortranSemantics check-select-rank.cpp check-select-type.cpp check-stop.cpp + + DEPENDS + acc_gen + omp_gen + + LINK_LIBS + FortranSupport + FortranParser + FortranEvaluate + + LINK_COMPONENTS + Support + FrontendOpenMP + FrontendOpenACC + TargetParser +) + +add_flang_library(FortranSemantics PARTIAL_SOURCES_INTENDED + assignment.cpp + attr.cpp + canonicalize-acc.cpp + canonicalize-directives.cpp + canonicalize-do.cpp + canonicalize-omp.cpp compute-offsets.cpp data-to-inits.cpp definable.cpp @@ -61,6 +79,7 @@ add_flang_library(FortranSemantics FortranSupport FortranParser FortranEvaluate + FortranSemanticsChecks LINK_COMPONENTS Support @@ -68,12 +87,3 @@ add_flang_library(FortranSemantics FrontendOpenACC TargetParser ) - -target_precompile_headers(FortranSemantics PRIVATE - [["flang/Semantics/semantics.h"]] - [["flang/Semantics/type.h"]] - [["flang/Semantics/openmp-modifiers.h"]] - [["flang/Semantics/expression.h"]] - [["flang/Semantics/tools.h"]] - [["flang/Semantics/symbol.h"]] -) diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index 9722eca19447d..231c9eef9bfde 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -177,10 +177,9 @@ class CanonicalizationOfOmp { auto &nestedBeginLoopDirective = std::get(nestedBeginDirective.t); if ((nestedBeginLoopDirective.v == llvm::omp::Directive::OMPD_unroll || - nestedBeginLoopDirective.v == - llvm::omp::Directive::OMPD_tile) && - !(nestedBeginLoopDirective.v == llvm::omp::Directive::OMPD_unroll && - dir.v == llvm::omp::Directive::OMPD_tile)) { + nestedBeginLoopDirective.v == llvm::omp::Directive::OMPD_tile || + nestedBeginLoopDirective.v == llvm::omp::Directive::OMPD_interchange) && + !(nestedBeginLoopDirective.v == llvm::omp::Directive::OMPD_unroll && dir.v == llvm::omp::Directive::OMPD_tile)) { // iterate through the remaining block items to find the end directive // for the unroll/tile directive. parser::Block::iterator endIt; @@ -205,9 +204,8 @@ class CanonicalizationOfOmp { std::optional{parser::NestedConstruct{ common::Indirection{std::move(*ompLoopCons)}}}; nextIt = block.erase(nextIt); - } else if (nestedBeginLoopDirective.v == - llvm::omp::Directive::OMPD_unroll && - dir.v == llvm::omp::Directive::OMPD_tile) { + } else if (nestedBeginLoopDirective.v == llvm::omp::Directive::OMPD_unroll && + dir.v == llvm::omp::Directive::OMPD_tile) { // if a loop has been unrolled, the user can not then tile that loop // as it has been unrolled parser::OmpClauseList &unrollClauseList{ diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 94e1fbde24389..c6c4ce75d993f 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -828,14 +828,14 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { llvm::SmallVector &); std::int64_t GetNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &); - std::int64_t GetNumAffectedLoopsFromClauses(const parser::OmpClauseList &); + std::int64_t GetNumAffectedLoopsFromClauses( const parser::OpenMPLoopConstruct &y, const parser::OmpClauseList &); void CollectNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &, llvm::SmallVector &, llvm::SmallVector &); void CollectNumAffectedLoopsFromInnerLoopContruct( const parser::OpenMPLoopConstruct &, llvm::SmallVector &, llvm::SmallVector &); - void CollectNumAffectedLoopsFromClauses(const parser::OmpClauseList &, + void CollectNumAffectedLoopsFromClauses( const parser::OpenMPLoopConstruct &x, const parser::OmpClauseList &, llvm::SmallVector &, llvm::SmallVector &); @@ -1880,6 +1880,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { case llvm::omp::Directive::OMPD_teams_distribute_simd: case llvm::omp::Directive::OMPD_teams_loop: case llvm::omp::Directive::OMPD_tile: + case llvm::omp::Directive::OMPD_interchange: case llvm::omp::Directive::OMPD_unroll: PushContext(beginDir.source, beginDir.v); break; @@ -1996,7 +1997,7 @@ bool OmpAttributeVisitor::Pre(const parser::DoConstruct &x) { } static bool isSizesClause(const parser::OmpClause *clause) { - return std::holds_alternative(clause->u); + return clause && std::holds_alternative(clause->u); } std::int64_t OmpAttributeVisitor::SetAssociatedMaxClause( @@ -2041,15 +2042,21 @@ std::int64_t OmpAttributeVisitor::GetNumAffectedLoopsFromLoopConstruct( return SetAssociatedMaxClause(levels, clauses); } -std::int64_t OmpAttributeVisitor::GetNumAffectedLoopsFromClauses( + + + +std::int64_t OmpAttributeVisitor::GetNumAffectedLoopsFromClauses( const parser::OpenMPLoopConstruct &y, const parser::OmpClauseList &x) { llvm::SmallVector levels; llvm::SmallVector clauses; - CollectNumAffectedLoopsFromClauses(x, levels, clauses); + CollectNumAffectedLoopsFromClauses( y, x, levels, clauses); return SetAssociatedMaxClause(levels, clauses); } + + + void OmpAttributeVisitor::CollectNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &x, llvm::SmallVector &levels, @@ -2057,7 +2064,8 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromLoopConstruct( const auto &beginLoopDir{std::get(x.t)}; const auto &clauseList{std::get(beginLoopDir.t)}; - CollectNumAffectedLoopsFromClauses(clauseList, levels, clauses); + + CollectNumAffectedLoopsFromClauses( x, clauseList, levels, clauses); CollectNumAffectedLoopsFromInnerLoopContruct(x, levels, clauses); } @@ -2080,9 +2088,19 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromInnerLoopContruct( } } -void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses( +void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses( const parser::OpenMPLoopConstruct &y, const parser::OmpClauseList &x, llvm::SmallVector &levels, llvm::SmallVector &clauses) { + const auto &beginLoopDir{std::get(y.t)}; + auto&& yt = std::get<0>(beginLoopDir.t); + + + + + const auto &beginDir{std::get(beginLoopDir.t)}; + const auto &dirClauses{std::get(beginLoopDir.t)}; + auto ytv = beginDir.v; + for (const auto &clause : x.v) { if (const auto oclause{ std::get_if(&clause.u)}) { @@ -2108,7 +2126,25 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses( levels.push_back(tclause->v.size()); clauses.push_back(&clause); } + + } + + + if (ytv == llvm::omp::OMPD_interchange) { + for (const auto &clause : dirClauses.v) { + if (const auto tclause{std::get_if(&clause.u)}) { + levels.push_back(tclause->v.size()); + clauses.push_back(&clause); + llvm_unreachable("MK: fetch permute depth"); + return ; + } + } + + + levels.push_back(2); + clauses.push_back(nullptr); + } } // 2.15.1.1 Data-sharing Attribute Rules - Predetermined @@ -2566,7 +2602,7 @@ static bool IsTargetCaptureImplicitlyFirstprivatizeable(const Symbol &symbol, // It is default firstprivatizeable as far as the OpenMP specification is // concerned if it is a non-array scalar type that has been implicitly // captured in a target region - const auto *type{checkSym.GetType()}; + const auto *type{checkSym.GetType() }; if ((!checkSym.GetShape() || checkSym.GetShape()->empty()) && (type->category() == Fortran::semantics::DeclTypeSpec::Category::Numeric || diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h index 5bb1f3f36b65e..357f4c6e54502 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h @@ -211,6 +211,7 @@ struct ConstructDecompositionT { const ClauseTy *); bool applyClause(const tomp::clause::SizesT &clause, const ClauseTy *); + bool applyClause(const tomp::clause::PermutationT &clause,const ClauseTy *); bool applyClause(const tomp::clause::PrivateT &clause, const ClauseTy *); bool @@ -484,6 +485,7 @@ bool ConstructDecompositionT::applyClause( return false; } + // FIXME(JAN): Do the correct thing, but for now we'll do the same as collapse template bool ConstructDecompositionT::applyClause( @@ -503,6 +505,24 @@ bool ConstructDecompositionT::applyClause( return false; } +#if 1 +template +bool ConstructDecompositionT::applyClause(const tomp::clause::PermutationT &clause,const ClauseTy *node) { + // Apply "permutation" to the innermost directive. If it's not one that + // allows it flag an error. + if (!leafs.empty()) { + auto &last = leafs.back(); + + if (llvm::omp::isAllowedClauseForDirective(last.id, node->id, version)) { + last.clauses.push_back(node); + return true; + } + } + + return false; +} +#endif + // PRIVATE // [5.2:111:5-7] // Directives: distribute, do, for, loop, parallel, scope, sections, simd, diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td index eb836db890738..4a3ae30c2a82f 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td @@ -349,6 +349,26 @@ class OpenMP_TileSizesClauseSkip< def OpenMP_TileSizesClause : OpenMP_TileSizesClauseSkip<>; + + +//===----------------------------------------------------------------------===// +// V6.0: [xx.x] `permutation` clause +//===----------------------------------------------------------------------===// + +class OpenMP_PermutationClauseSkip< + bit traits = false, bit arguments = false, bit assemblyFormat = false, + bit description = false, bit extraClassDeclaration = false + > : OpenMP_Clause { + let arguments = (ins + BoolAttr:$interchangeEnabled, + OptionalAttr:$permutation + ); +} + +def OpenMP_PermutationClause : OpenMP_PermutationClauseSkip<>; + + //===----------------------------------------------------------------------===// // V5.2: [11.6.1] `dist_schedule` clause //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index e17315d923317..7a251130db113 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -616,7 +616,8 @@ def LoopNestOp : OpenMP_Op<"loop_nest", traits = [ ], clauses = [ OpenMP_LoopRelatedClause, OpenMP_CollapseClause, - OpenMP_TileSizesClause + OpenMP_TileSizesClause, + OpenMP_PermutationClause ], singleRegion = true> { let summary = "rectangular loop nest"; let description = [{ diff --git a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp index f056e72531bfc..aefc86b783eaa 100644 --- a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp +++ b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp @@ -494,7 +494,7 @@ struct ParallelOpLowering : public OpRewritePattern { auto loopOp = omp::LoopNestOp::create( rewriter, parallelOp.getLoc(), parallelOp.getLowerBound(), parallelOp.getUpperBound(), parallelOp.getStep(), false, - parallelOp.getLowerBound().size(), nullptr); + parallelOp.getLowerBound().size(), nullptr, false, nullptr); rewriter.inlineRegionBefore(parallelOp.getRegion(), loopOp.getRegion(), loopOp.getRegion().begin()); diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 8768eed13cf32..65157a04b3ae7 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -91,7 +91,7 @@ void OpenMPDialect::initialize() { #include "mlir/Dialect/OpenMP/OpenMPOpsTypes.cpp.inc" >(); - declarePromisedInterface(); + declarePromisedInterface(); MemRefType::attachInterface(*getContext()); LLVM::LLVMPointerType::attachInterface( @@ -3037,10 +3037,13 @@ void LoopNestOp::print(OpAsmPrinter &p) { void LoopNestOp::build(OpBuilder &builder, OperationState &state, const LoopNestOperands &clauses) { MLIRContext *ctx = builder.getContext(); + + auto perm = makeDenseI64ArrayAttr(ctx, clauses.permutation); LoopNestOp::build(builder, state, clauses.loopLowerBounds, clauses.loopUpperBounds, clauses.loopSteps, - clauses.loopInclusive, clauses.numCollapse, - makeDenseI64ArrayAttr(ctx, clauses.tileSizes)); + clauses.loopInclusive, clauses.numCollapse, + makeDenseI64ArrayAttr(ctx, clauses.tileSizes), + clauses.interchangeEnabled , perm ); } LogicalResult LoopNestOp::verify() { From 2d0918319edb6fc146973c279df5c7cea0636132 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Tue, 26 Aug 2025 08:45:57 -0400 Subject: [PATCH 31/64] Remove stand-alone tiling. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 8 +++- flang/lib/Semantics/resolve-directives.cpp | 10 ----- flang/test/Lower/OpenMP/wsloop-tile.f90 | 39 ------------------- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 1 + 4 files changed, 7 insertions(+), 51 deletions(-) delete mode 100644 flang/test/Lower/OpenMP/wsloop-tile.f90 diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 355ba9428995e..7dc46258cea70 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -3500,9 +3500,13 @@ static void genOMPDispatch(lower::AbstractConverter &converter, newOp = genTeamsOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; - case llvm::omp::Directive::OMPD_tile: - newOp = genLoopOp(converter, symTable, semaCtx, eval, loc, queue, item); + case llvm::omp::Directive::OMPD_tile: { + unsigned version = semaCtx.langOptions().OpenMPVersion; + if (!semaCtx.langOptions().OpenMPSimd) + TODO(loc, "Unhandled loop directive (" + + llvm::omp::getOpenMPDirectiveName(dir, version) + ")"); break; + } case llvm::omp::Directive::OMPD_unroll: genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 20df9d01de0c0..d53be2fea89f2 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -825,7 +825,6 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { llvm::SmallVector &); std::int64_t GetNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &); - std::int64_t GetNumAffectedLoopsFromClauses(const parser::OmpClauseList &); void CollectNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &, llvm::SmallVector &, llvm::SmallVector &); @@ -2037,15 +2036,6 @@ std::int64_t OmpAttributeVisitor::GetNumAffectedLoopsFromLoopConstruct( return SetAssociatedMaxClause(levels, clauses); } -std::int64_t OmpAttributeVisitor::GetNumAffectedLoopsFromClauses( - const parser::OmpClauseList &x) { - llvm::SmallVector levels; - llvm::SmallVector clauses; - - CollectNumAffectedLoopsFromClauses(x, levels, clauses); - return SetAssociatedMaxClause(levels, clauses); -} - void OmpAttributeVisitor::CollectNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &x, llvm::SmallVector &levels, diff --git a/flang/test/Lower/OpenMP/wsloop-tile.f90 b/flang/test/Lower/OpenMP/wsloop-tile.f90 deleted file mode 100644 index 4c412b357f52e..0000000000000 --- a/flang/test/Lower/OpenMP/wsloop-tile.f90 +++ /dev/null @@ -1,39 +0,0 @@ -! This test checks lowering of OpenMP DO Directive(Worksharing) with collapse. - -! RUN: bbc -fopenmp -fopenmp-version=51 -emit-hlfir %s -o - | FileCheck %s - -!CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "WSLOOP_TILE"} { -program wsloop_tile - integer :: i, j, k - integer :: a, b, c - integer :: x - - a=30 - b=20 - c=50 - x=0 - - !CHECK: omp.loop_nest (%[[IV_0:.*]], %[[IV_1:.*]], %[[IV_2:.*]]) : i32 - !CHECK-SAME: tiles(2, 5, 10) - - !$omp do - !$omp tile sizes(2,5,10) - do i = 1, a - do j= 1, b - do k = 1, c - !CHECK: hlfir.assign %[[IV_0]] to %[[IV_0A:.*]] : i32 - !CHECK: hlfir.assign %[[IV_1]] to %[[IV_1A:.*]] : i32 - !CHECK: hlfir.assign %[[IV_2]] to %[[IV_2A:.*]] : i32 - !CHECK: %[[IVV_0:.*]] = fir.load %[[IV_0A]] - !CHECK: %[[SUM0:.*]] = arith.addi %{{.*}}, %[[IVV_0]] : i32 - !CHECK: %[[IVV_1:.*]] = fir.load %[[IV_1A]] - !CHECK: %[[SUM1:.*]] = arith.addi %[[SUM0]], %[[IVV_1]] : i32 - !CHECK: %[[IVV_2:.*]] = fir.load %[[IV_2A]] - !CHECK: %[[SUM2:.*]] = arith.addi %[[SUM1]], %[[IVV_2]] : i32 - x = x + i + j + k - end do - end do - end do - !$omp end tile - !$omp end do -end program wsloop_tile diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index a8f4b3e585f22..8e11f60fdc886 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -2969,6 +2969,7 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); auto loopOp = cast(opInst); + // Set up the source location value for OpenMP runtime. llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); From 66818b328aaba4a89d856ce0d0f7d1edd36a4878 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Tue, 26 Aug 2025 10:27:14 -0400 Subject: [PATCH 32/64] Revert unused changes. --- .../Frontend/OpenMP/ConstructDecompositionT.h | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h index 5bb1f3f36b65e..047baa3a79f5d 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h @@ -209,8 +209,6 @@ struct ConstructDecompositionT { bool applyClause(const tomp::clause::CollapseT &clause, const ClauseTy *); - bool applyClause(const tomp::clause::SizesT &clause, - const ClauseTy *); bool applyClause(const tomp::clause::PrivateT &clause, const ClauseTy *); bool @@ -484,24 +482,6 @@ bool ConstructDecompositionT::applyClause( return false; } -// FIXME(JAN): Do the correct thing, but for now we'll do the same as collapse -template -bool ConstructDecompositionT::applyClause( - const tomp::clause::SizesT &clause, - const ClauseTy *node) { - // Apply "sizes" to the innermost directive. If it's not one that - // allows it flag an error. - if (!leafs.empty()) { - auto &last = leafs.back(); - - if (llvm::omp::isAllowedClauseForDirective(last.id, node->id, version)) { - last.clauses.push_back(node); - return true; - } - } - - return false; -} // PRIVATE // [5.2:111:5-7] From f934fa6e2ad864e7b6be277ea2286810341c2095 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Tue, 26 Aug 2025 10:54:38 -0400 Subject: [PATCH 33/64] Don't do codegen for tiling if it is an inner construct. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 7dc46258cea70..e15e0773123c9 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -3968,8 +3968,8 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, parser::omp::GetOmpDirectiveName(*ompNestedLoopCons).v; switch (nestedDirective) { case llvm::omp::Directive::OMPD_tile: - // Emit the omp.loop_nest with annotation for tiling - genOMP(converter, symTable, semaCtx, eval, ompNestedLoopCons->value()); + // Skip OMPD_tile since the tile sizes will be retrieved when + // generating the omp.looop_nest op. break; default: { unsigned version = semaCtx.langOptions().OpenMPVersion; From 8f793a6549a739a04f7a675550e71cc9da44b5d7 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 26 Aug 2025 19:02:53 +0200 Subject: [PATCH 34/64] successful interchange --- flang/lib/Lower/OpenMP/Decomposer.cpp | 29 ++- flang/lib/Lower/OpenMP/Decomposer.h | 4 + flang/lib/Lower/OpenMP/OpenMP.cpp | 225 ++++++++++++------ flang/lib/Lower/OpenMP/Utils.cpp | 28 +-- flang/lib/Lower/OpenMP/Utils.h | 2 - flang/lib/Semantics/canonicalize-omp.cpp | 13 +- flang/lib/Semantics/resolve-directives.cpp | 73 +++--- .../Frontend/OpenMP/ConstructDecompositionT.h | 10 +- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 10 +- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 1 + 10 files changed, 247 insertions(+), 148 deletions(-) diff --git a/flang/lib/Lower/OpenMP/Decomposer.cpp b/flang/lib/Lower/OpenMP/Decomposer.cpp index 9bfbf67bec88c..bf09bed395285 100644 --- a/flang/lib/Lower/OpenMP/Decomposer.cpp +++ b/flang/lib/Lower/OpenMP/Decomposer.cpp @@ -98,14 +98,39 @@ ConstructQueue buildConstructQueue( return decompose.output; } +// from clang +static bool isOpenMPLoopTransformationDirective(llvm::omp::Directive DKind) { + return DKind == llvm::omp::Directive::OMPD_tile || + DKind == llvm::omp::Directive::OMPD_unroll || + DKind == llvm::omp::Directive::OMPD_reverse || + DKind == llvm::omp::Directive::OMPD_interchange || + DKind == llvm::omp::Directive::OMPD_stripe; +} + +llvm::iterator_range getNonTransformQueue( + llvm::iterator_range range) { + // remove trailing loop transformations + auto b = range.begin(); + auto e = range.end(); + while (e != b) { + auto e2 = e - 1; + if (!isOpenMPLoopTransformationDirective(e2->id)) + break; + e = e2; + } + + return llvm::make_range(b, e); +} + bool matchLeafSequence(ConstructQueue::const_iterator item, const ConstructQueue &queue, llvm::omp::Directive directive) { llvm::ArrayRef leafDirs = llvm::omp::getLeafConstructsOrSelf(directive); - for (auto [dir, leaf] : - llvm::zip_longest(leafDirs, llvm::make_range(item, queue.end()))) { + for (auto [dir, leaf] : llvm::zip_longest( + leafDirs, + getNonTransformQueue(llvm::make_range(item, queue.end())))) { if (!dir.has_value() || !leaf.has_value()) return false; diff --git a/flang/lib/Lower/OpenMP/Decomposer.h b/flang/lib/Lower/OpenMP/Decomposer.h index 65492bd76280d..f057009629efc 100644 --- a/flang/lib/Lower/OpenMP/Decomposer.h +++ b/flang/lib/Lower/OpenMP/Decomposer.h @@ -57,6 +57,10 @@ bool isLastItemInQueue(ConstructQueue::const_iterator item, bool matchLeafSequence(ConstructQueue::const_iterator item, const ConstructQueue &queue, llvm::omp::Directive directive); + +llvm::iterator_range getNonTransformQueue( + llvm::iterator_range range); + } // namespace Fortran::lower::omp #endif // FORTRAN_LOWER_OPENMP_DECOMPOSER_H diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 154361e28734b..3ce88a972639d 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -441,9 +441,11 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, middleClauseList = &std::get(innerBegin.t); } - if (innerDirective.v == llvm::omp::Directive::OMPD_interchange) { - llvm_unreachable("MK: Handle this"); - middleClauseList = &std::get(innerBegin.t); + if (innerDirective.v == + llvm::omp::Directive::OMPD_interchange) { + llvm_unreachable("MK: Handle this"); + middleClauseList = + &std::get(innerBegin.t); } } if (auto &endDirective = @@ -1195,10 +1197,11 @@ struct OpWithBodyGenInfo { /// \param [in] item - item in the queue to generate body for. static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info, const ConstructQueue &queue, - ConstructQueue::const_iterator item) { int a = 0; + ConstructQueue::const_iterator item) { + int a = 0; if (a) { op.dump(); - } + } fir::FirOpBuilder &firOpBuilder = info.converter.getFirOpBuilder(); auto insertMarker = [](fir::FirOpBuilder &builder) { @@ -1269,8 +1272,11 @@ static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info, } if (!info.genSkeletonOnly) { + // Transforms already processed by getLoopNestOp + auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); + auto transforms = llvm::make_range(q.end(), queue.end()); if (ConstructQueue::const_iterator next = std::next(item); - next != queue.end()) { + next != transforms.begin() && next != queue.end()) { genOMPDispatch(info.converter, info.symTable, info.semaCtx, info.eval, info.loc, queue, next); } else { @@ -1337,10 +1343,11 @@ static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info, // present). Otherwise, these operations will be inserted within a // wrapper region. mlir::Operation *privatizationBottomLevelOp = &op; - if (auto loopNest = llvm::dyn_cast(op)) { int b = 0; - if (b) { - loopNest.dump(); - } + if (auto loopNest = llvm::dyn_cast(op)) { + int b = 0; + if (b) { + loopNest.dump(); + } llvm::SmallVector wrappers; loopNest.gatherWrappers(wrappers); if (!wrappers.empty()) @@ -1689,7 +1696,8 @@ genLoopNestClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const List &clauses, mlir::Location loc, mlir::omp::LoopNestOperands &clauseOps, - llvm::SmallVectorImpl &iv, bool enableInterchange = false) { + llvm::SmallVectorImpl &iv, + bool enableInterchange = false) { ClauseProcessor cp(converter, semaCtx, clauses); HostEvalInfo *hostEvalInfo = getHostEvalInfoStackTop(converter); @@ -1714,8 +1722,8 @@ genLoopNestClauses(lower::AbstractConverter &converter, } clauseOps.tileSizes = sizeValues; } else if (clause.id == llvm::omp::Clause::OMPC_permutation) { -llvm_unreachable("MK: To handle standalone interchange construct"); - } + llvm_unreachable("MK: To handle standalone interchange construct"); + } } llvm::SmallVector sizeValues; @@ -1724,11 +1732,13 @@ llvm_unreachable("MK: To handle standalone interchange construct"); if (sizeValues.size() > 0) clauseOps.tileSizes = sizeValues; - llvm::SmallVector permutationValues; collectPermutationFromOpenMPConstruct(ompCons, permutationValues, semaCtx); + llvm::SmallVector permutationValues; + collectPermutationFromOpenMPConstruct(ompCons, permutationValues, semaCtx); if (enableInterchange) { - permutationValues.append({2,1}); + permutationValues.append({2, 1}); } - clauseOps.interchangeEnabled = mlir:: BoolAttr::get( firOpBuilder.getContext() , enableInterchange); + clauseOps.interchangeEnabled = + mlir::BoolAttr::get(firOpBuilder.getContext(), enableInterchange); clauseOps.permutation = permutationValues; } @@ -2100,15 +2110,78 @@ static mlir::omp::LoopNestOp genLoopNestOp( llvm::ArrayRef< std::pair> wrapperArgs, - llvm::omp::Directive directive, DataSharingProcessor &dsp) { + llvm::omp::Directive directive, DataSharingProcessor &dsp, + std::optional> + transforms = std::nullopt) { auto ivCallback = [&](mlir::Operation *op) { genLoopVars(op, converter, loc, iv, wrapperArgs); return llvm::SmallVector(iv); }; - uint64_t nestValue = getCollapseValue(item->clauses); + uint64_t nestValue = getCollapseValue( + item->clauses); // MK: Should be number of affected loops? nestValue = nestValue < iv.size() ? iv.size() : nestValue; auto *nestedEval = getCollapsedLoopEval(eval, nestValue); + + if (!transforms.has_value()) { + // This must be a standalone construct, assume all following actions are + // transformations + transforms = llvm::make_range(std::next(item), queue.end()); + } + + for (auto &&transform : llvm::reverse(*transforms)) { + auto d = transform.id; + auto clauses = transform.clauses; + + switch (d) { + case llvm::omp::OMPD_interchange: { + bool hasPermutationClause = false; + llvm::SmallVector permutation; + + auto &&permutationClause = ClauseFinder::findUniqueClause< + Fortran::lower::omp::clause::Permutation>(clauses); + if (permutationClause) { + permutation.reserve(permutationClause->v.size()); + for (auto &&ts : permutationClause->v) { + permutation.push_back(evaluate::ToInt64(ts).value()); + } + // llvm::append_range( permutation, permutationClause->v); + + } else { + permutation = {2, 1}; + } + + assert(permutation.size() == iv.size() && + "TODO: if permutation is smaller than number of associated loops, " + "permute only the first loops"); + llvm::SmallVector newIVs; + llvm::SmallVector newLBs; + llvm::SmallVector newUBs; + llvm::SmallVector newINCs; + llvm::SmallVector newSizes; + + // TODO: Assert this is a valid permution + for (auto perm : permutation) { + newIVs.push_back(iv[perm - 1]); + newLBs.push_back(clauseOps.loopLowerBounds[perm - 1]); + newUBs.push_back(clauseOps.loopUpperBounds[perm - 1]); + newINCs.push_back(clauseOps.loopSteps[perm - 1]); + if (!clauseOps.tileSizes.empty()) + newSizes.push_back(clauseOps.tileSizes[perm - 1]); + } + + iv = newIVs; + clauseOps.loopLowerBounds = newLBs; + clauseOps.loopUpperBounds = newUBs; + clauseOps.loopSteps = newINCs; + clauseOps.tileSizes = newSizes; + + } break; + default: + llvm_unreachable("MK: loop transformation not yet implemented"); + } + } + return genOpWithBody( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, *nestedEval, directive) @@ -2122,7 +2195,7 @@ static mlir::omp::LoopOp genLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, - ConstructQueue::const_iterator item, llvm::omp::Directive dir = llvm::omp::Directive::OMPD_loop , bool enableInterchange = false) { + ConstructQueue::const_iterator item) { mlir::omp::LoopOperands loopClauseOps; llvm::SmallVector loopReductionSyms; genLoopClauses(converter, semaCtx, item->clauses, loc, loopClauseOps, @@ -2136,7 +2209,7 @@ genLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable, mlir::omp::LoopNestOperands loopNestClauseOps; llvm::SmallVector iv; genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, - loopNestClauseOps, iv, enableInterchange); + loopNestClauseOps, iv); EntryBlockArgs loopArgs; loopArgs.priv.syms = dsp.getDelayedPrivSymbols(); @@ -2144,35 +2217,11 @@ genLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable, loopArgs.reduction.syms = loopReductionSyms; loopArgs.reduction.vars = loopClauseOps.reductionVars; - // Applying interchange clause - // tiling assumed to be applied after interchange - if (loopNestClauseOps.permutation.size() >=1) { - assert(loopNestClauseOps.permutation.size() == iv.size() && "TODO: if permutation is smaller than number of associated loops, permute only the first loops"); - llvm::SmallVector newIVs; - llvm::SmallVector newLBs; - llvm::SmallVector newUBs; - llvm::SmallVector newINCs; llvm::SmallVector newSizes; - - // TODO: Assert this is a valid permution - for (auto perm :loopNestClauseOps.permutation) { - newIVs.push_back(iv[perm-1]); - newLBs.push_back( loopNestClauseOps.loopLowerBounds[perm-1] ); - newUBs.push_back( loopNestClauseOps.loopUpperBounds[perm-1] ); - newINCs.push_back( loopNestClauseOps.loopSteps [perm-1] ); if (! loopNestClauseOps.tileSizes.empty()) newSizes.push_back( loopNestClauseOps.tileSizes[perm-1]); - } - - iv = newIVs; - loopNestClauseOps.loopLowerBounds = newLBs; - loopNestClauseOps.loopUpperBounds = newUBs; - loopNestClauseOps.loopSteps = newINCs; loopNestClauseOps.tileSizes = newSizes; - } - - //if (dir == llvm::omp::Directive::OMPD_loop) { auto loopOp = genWrapperOp(converter, loc, loopClauseOps, loopArgs); - // } genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, - loopNestClauseOps, iv, {{loopOp, loopArgs}}, dir, dsp); + loopNestClauseOps, iv, {{loopOp, loopArgs}}, + llvm::omp::Directive::OMPD_loop, dsp); return loopOp; } @@ -3089,7 +3138,10 @@ static mlir::omp::DistributeOp genCompositeDistributeParallelDo( lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { - assert(std::distance(item, queue.end()) == 3 && "Invalid leaf constructs"); + auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); + auto transforms = llvm::make_range(q.end(), queue.end()); + + assert(llvm::range_size(q) == 3 && "Invalid leaf constructs"); ConstructQueue::const_iterator distributeItem = item; ConstructQueue::const_iterator parallelItem = std::next(distributeItem); ConstructQueue::const_iterator doItem = std::next(parallelItem); @@ -3143,10 +3195,10 @@ static mlir::omp::DistributeOp genCompositeDistributeParallelDo( converter, loc, wsloopClauseOps, wsloopArgs); wsloopOp.setComposite(/*val=*/true); - genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, doItem, - loopNestClauseOps, iv, - {{distributeOp, distributeArgs}, {wsloopOp, wsloopArgs}}, - llvm::omp::Directive::OMPD_distribute_parallel_do, dsp); + genLoopNestOp( + converter, symTable, semaCtx, eval, loc, queue, doItem, loopNestClauseOps, + iv, {{distributeOp, distributeArgs}, {wsloopOp, wsloopArgs}}, + llvm::omp::Directive::OMPD_distribute_parallel_do, dsp, transforms); return distributeOp; } @@ -3155,7 +3207,11 @@ static mlir::omp::DistributeOp genCompositeDistributeParallelDoSimd( lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { - assert(std::distance(item, queue.end()) == 4 && "Invalid leaf constructs"); + auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); + auto transforms = llvm::make_range(q.end(), queue.end()); + + assert(llvm::range_size(q) == 4 && "Invalid leaf constructs"); + ConstructQueue::const_iterator distributeItem = item; ConstructQueue::const_iterator parallelItem = std::next(distributeItem); ConstructQueue::const_iterator doItem = std::next(parallelItem); @@ -3237,7 +3293,7 @@ static mlir::omp::DistributeOp genCompositeDistributeParallelDoSimd( {wsloopOp, wsloopArgs}, {simdOp, simdArgs}}, llvm::omp::Directive::OMPD_distribute_parallel_do_simd, - simdItemDSP); + simdItemDSP, transforms); return distributeOp; } @@ -3246,7 +3302,11 @@ static mlir::omp::DistributeOp genCompositeDistributeSimd( lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { - assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs"); + auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); + auto transforms = llvm::make_range(q.end(), queue.end()); + + assert(llvm::range_size(q) == 2 && "Invalid leaf constructs"); + ConstructQueue::const_iterator distributeItem = item; ConstructQueue::const_iterator simdItem = std::next(distributeItem); @@ -3298,7 +3358,8 @@ static mlir::omp::DistributeOp genCompositeDistributeSimd( genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, simdItem, loopNestClauseOps, iv, {{distributeOp, distributeArgs}, {simdOp, simdArgs}}, - llvm::omp::Directive::OMPD_distribute_simd, simdItemDSP); + llvm::omp::Directive::OMPD_distribute_simd, simdItemDSP, + transforms); return distributeOp; } @@ -3307,7 +3368,11 @@ static mlir::omp::WsloopOp genCompositeDoSimd( lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { - assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs"); + auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); + auto transforms = llvm::make_range(q.end(), queue.end()); + + assert(llvm::range_size(q) == 2 && "Invalid leaf constructs"); + ConstructQueue::const_iterator doItem = item; ConstructQueue::const_iterator simdItem = std::next(doItem); @@ -3362,7 +3427,7 @@ static mlir::omp::WsloopOp genCompositeDoSimd( genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, simdItem, loopNestClauseOps, iv, {{wsloopOp, wsloopArgs}, {simdOp, simdArgs}}, - llvm::omp::Directive::OMPD_do_simd, simdItemDSP); + llvm::omp::Directive::OMPD_do_simd, simdItemDSP, transforms); return wsloopOp; } @@ -3371,7 +3436,11 @@ static mlir::omp::TaskloopOp genCompositeTaskloopSimd( lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { - assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs"); + auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); + auto transforms = llvm::make_range(q.end(), queue.end()); + + assert(llvm::range_size(q) == 2 && "Invalid leaf constructs"); + if (!semaCtx.langOptions().OpenMPSimd) TODO(loc, "Composite TASKLOOP SIMD"); return nullptr; @@ -3550,10 +3619,8 @@ static void genOMPDispatch(lower::AbstractConverter &converter, genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_interchange: - newOp = genLoopOp(converter, symTable, semaCtx, eval, loc, queue, item , llvm::omp::Directive::OMPD_interchange, /*Interchange=*/true); -//llvm_unreachable("MK: implement interchange"); -//genInterchangeOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); - break; + llvm_unreachable("MK: standalone interchange not implemented"); + break; // case llvm::omp::Directive::OMPD_workdistribute: case llvm::omp::Directive::OMPD_workshare: newOp = genWorkshareOp(converter, symTable, stmtCtx, semaCtx, eval, loc, @@ -4002,23 +4069,42 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, mlir::Location currentLocation = converter.genLocation(beginLoopDirective.source); + llvm::omp::Directive directive = + parser::omp::GetOmpDirectiveName(beginLoopDirective).v; + const parser::CharBlock &source = + std::get(beginLoopDirective.t).source; + ConstructQueue queue{ + buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx, + eval, source, directive, clauses)}; + auto &optLoopCons = std::get>(loopConstruct.t); if (optLoopCons.has_value()) { if (auto *ompNestedLoopCons{ std::get_if>( &*optLoopCons)}) { + const Fortran::parser::OpenMPLoopConstruct &x = + ompNestedLoopCons->value(); + const Fortran::parser::OmpBeginLoopDirective &y = std::get<0>(x.t); + const Fortran::parser::OmpClauseList &clauseList = std::get<1>(y.t); llvm::omp::Directive nestedDirective = parser::omp::GetOmpDirectiveName(*ompNestedLoopCons).v; + List nestedClauses = + makeClauses(std::get(y.t), semaCtx); + switch (nestedDirective) { case llvm::omp::Directive::OMPD_tile: // Emit the omp.loop_nest with annotation for tiling genOMP(converter, symTable, semaCtx, eval, ompNestedLoopCons->value()); break; - case llvm::omp::Directive::OMPD_interchange: - genOMP(converter, symTable, semaCtx, eval, ompNestedLoopCons->value()); - // llvm_unreachable("MK: implement nested interchange"); - break; + case llvm::omp::Directive::OMPD_interchange: { + ConstructQueue nestedQueue{buildConstructQueue( + converter.getFirOpBuilder().getModule(), semaCtx, eval, source, + nestedDirective, nestedClauses)}; + for (auto nl : nestedQueue) { + queue.push_back(nl); + } + } break; default: { unsigned version = semaCtx.langOptions().OpenMPVersion; TODO(currentLocation, @@ -4030,13 +4116,6 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, } } - llvm::omp::Directive directive = - parser::omp::GetOmpDirectiveName(beginLoopDirective).v; - const parser::CharBlock &source = - std::get(beginLoopDirective.t).source; - ConstructQueue queue{ - buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx, - eval, source, directive, clauses)}; genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue, queue.begin()); } diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index ccfecabcf8a50..e79bc585f0872 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -669,7 +669,6 @@ void collectTileSizesFromOpenMPConstruct( } } - /// Populates the sizes vector with values if the given OpenMPConstruct /// Contains a loop construct with an inner tiling construct. void collectPermutationFromOpenMPConstruct( @@ -733,7 +732,7 @@ bool collectLoopRelatedInfo( // Collect sizes from tile directive if present std::int64_t sizesLengthValue = 0l; - std::int64_t permutationLengthValue = 0l; + std::int64_t permutationLengthValue = 0l; if (auto *ompCons{eval.getIf()}) { if (auto *ompLoop{std::get_if(&ompCons->u)}) { const auto &nestedOptional = @@ -764,29 +763,28 @@ bool collectLoopRelatedInfo( if (innerDirective == llvm::omp::Directive::OMPD_interchange) { // Get the size values from parse tree and convert to a vector - const auto &innerClauseList{std::get(innerBegin.t)}; - for (const auto &clause : innerClauseList.v){ - if (const auto tclause{ std::get_if(&clause.u)}) { + const auto &innerClauseList{ + std::get(innerBegin.t)}; + for (const auto &clause : innerClauseList.v) { + if (const auto tclause{ + std::get_if(&clause.u)}) { permutationLengthValue = tclause->v.size(); found = true; } - } + } // default: permution(2,1) if (permutationLengthValue == 0) - permutationLengthValue = 2; + permutationLengthValue = 2; } } } } - - -collapseValue = collapseValue - sizesLengthValue; -if (sizesLengthValue > collapseValue) - collapseValue = sizesLengthValue; -if (permutationLengthValue > collapseValue) - collapseValue = permutationLengthValue; - + collapseValue = collapseValue - sizesLengthValue; + if (sizesLengthValue > collapseValue) + collapseValue = sizesLengthValue; + if (permutationLengthValue > collapseValue) + collapseValue = permutationLengthValue; std::size_t loopVarTypeSize = 0; do { diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h index 5362c667d0575..4c097dcb659fd 100644 --- a/flang/lib/Lower/OpenMP/Utils.h +++ b/flang/lib/Lower/OpenMP/Utils.h @@ -180,13 +180,11 @@ void collectTileSizesFromOpenMPConstruct( llvm::SmallVectorImpl &tileSizes, Fortran::semantics::SemanticsContext &semaCtx); - void collectPermutationFromOpenMPConstruct( const parser::OpenMPConstruct *ompCons, llvm::SmallVectorImpl &permuation, Fortran::semantics::SemanticsContext &semaCtx); - } // namespace omp } // namespace lower } // namespace Fortran diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index 231c9eef9bfde..df39770cac235 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -177,9 +177,11 @@ class CanonicalizationOfOmp { auto &nestedBeginLoopDirective = std::get(nestedBeginDirective.t); if ((nestedBeginLoopDirective.v == llvm::omp::Directive::OMPD_unroll || - nestedBeginLoopDirective.v == llvm::omp::Directive::OMPD_tile || - nestedBeginLoopDirective.v == llvm::omp::Directive::OMPD_interchange) && - !(nestedBeginLoopDirective.v == llvm::omp::Directive::OMPD_unroll && dir.v == llvm::omp::Directive::OMPD_tile)) { + nestedBeginLoopDirective.v == llvm::omp::Directive::OMPD_tile || + nestedBeginLoopDirective.v == + llvm::omp::Directive::OMPD_interchange) && + !(nestedBeginLoopDirective.v == llvm::omp::Directive::OMPD_unroll && + dir.v == llvm::omp::Directive::OMPD_tile)) { // iterate through the remaining block items to find the end directive // for the unroll/tile directive. parser::Block::iterator endIt; @@ -204,8 +206,9 @@ class CanonicalizationOfOmp { std::optional{parser::NestedConstruct{ common::Indirection{std::move(*ompLoopCons)}}}; nextIt = block.erase(nextIt); - } else if (nestedBeginLoopDirective.v == llvm::omp::Directive::OMPD_unroll && - dir.v == llvm::omp::Directive::OMPD_tile) { + } else if (nestedBeginLoopDirective.v == + llvm::omp::Directive::OMPD_unroll && + dir.v == llvm::omp::Directive::OMPD_tile) { // if a loop has been unrolled, the user can not then tile that loop // as it has been unrolled parser::OmpClauseList &unrollClauseList{ diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index c6c4ce75d993f..ea46aca3de117 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -828,15 +828,16 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { llvm::SmallVector &); std::int64_t GetNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &); - std::int64_t GetNumAffectedLoopsFromClauses( const parser::OpenMPLoopConstruct &y, const parser::OmpClauseList &); + std::int64_t GetNumAffectedLoopsFromClauses( + const parser::OpenMPLoopConstruct &y, const parser::OmpClauseList &); void CollectNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &, llvm::SmallVector &, llvm::SmallVector &); void CollectNumAffectedLoopsFromInnerLoopContruct( const parser::OpenMPLoopConstruct &, llvm::SmallVector &, llvm::SmallVector &); - void CollectNumAffectedLoopsFromClauses( const parser::OpenMPLoopConstruct &x, const parser::OmpClauseList &, - llvm::SmallVector &, + void CollectNumAffectedLoopsFromClauses(const parser::OpenMPLoopConstruct &x, + const parser::OmpClauseList &, llvm::SmallVector &, llvm::SmallVector &); Symbol::Flags dataSharingAttributeFlags{Symbol::Flag::OmpShared, @@ -1880,7 +1881,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { case llvm::omp::Directive::OMPD_teams_distribute_simd: case llvm::omp::Directive::OMPD_teams_loop: case llvm::omp::Directive::OMPD_tile: - case llvm::omp::Directive::OMPD_interchange: + case llvm::omp::Directive::OMPD_interchange: case llvm::omp::Directive::OMPD_unroll: PushContext(beginDir.source, beginDir.v); break; @@ -2042,21 +2043,15 @@ std::int64_t OmpAttributeVisitor::GetNumAffectedLoopsFromLoopConstruct( return SetAssociatedMaxClause(levels, clauses); } - - - -std::int64_t OmpAttributeVisitor::GetNumAffectedLoopsFromClauses( const parser::OpenMPLoopConstruct &y, - const parser::OmpClauseList &x) { +std::int64_t OmpAttributeVisitor::GetNumAffectedLoopsFromClauses( + const parser::OpenMPLoopConstruct &y, const parser::OmpClauseList &x) { llvm::SmallVector levels; llvm::SmallVector clauses; - CollectNumAffectedLoopsFromClauses( y, x, levels, clauses); + CollectNumAffectedLoopsFromClauses(y, x, levels, clauses); return SetAssociatedMaxClause(levels, clauses); } - - - void OmpAttributeVisitor::CollectNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &x, llvm::SmallVector &levels, @@ -2064,8 +2059,7 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromLoopConstruct( const auto &beginLoopDir{std::get(x.t)}; const auto &clauseList{std::get(beginLoopDir.t)}; - - CollectNumAffectedLoopsFromClauses( x, clauseList, levels, clauses); + CollectNumAffectedLoopsFromClauses(x, clauseList, levels, clauses); CollectNumAffectedLoopsFromInnerLoopContruct(x, levels, clauses); } @@ -2088,18 +2082,16 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromInnerLoopContruct( } } -void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses( const parser::OpenMPLoopConstruct &y, - const parser::OmpClauseList &x, llvm::SmallVector &levels, +void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses( + const parser::OpenMPLoopConstruct &y, const parser::OmpClauseList &x, + llvm::SmallVector &levels, llvm::SmallVector &clauses) { - const auto &beginLoopDir{std::get(y.t)}; - auto&& yt = std::get<0>(beginLoopDir.t); - - - + const auto &beginLoopDir{std::get(y.t)}; + auto &&yt = std::get<0>(beginLoopDir.t); - const auto &beginDir{std::get(beginLoopDir.t)}; - const auto &dirClauses{std::get(beginLoopDir.t)}; - auto ytv = beginDir.v; + const auto &beginDir{std::get(beginLoopDir.t)}; + const auto &dirClauses{std::get(beginLoopDir.t)}; + auto ytv = beginDir.v; for (const auto &clause : x.v) { if (const auto oclause{ @@ -2126,25 +2118,22 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses( const parser::O levels.push_back(tclause->v.size()); clauses.push_back(&clause); } - - } - - if (ytv == llvm::omp::OMPD_interchange) { - for (const auto &clause : dirClauses.v) { - if (const auto tclause{std::get_if(&clause.u)}) { - levels.push_back(tclause->v.size()); - clauses.push_back(&clause); - llvm_unreachable("MK: fetch permute depth"); - return ; - } - } - - - levels.push_back(2); - clauses.push_back(nullptr); + if (ytv == llvm::omp::OMPD_interchange) { + for (const auto &clause : dirClauses.v) { + if (const auto tclause{ + std::get_if(&clause.u)}) { + levels.push_back(tclause->v.size()); + clauses.push_back(&clause); + // llvm_unreachable("MK: fetch permute depth"); + return; + } } + + levels.push_back(2); + clauses.push_back(nullptr); + } } // 2.15.1.1 Data-sharing Attribute Rules - Predetermined @@ -2602,7 +2591,7 @@ static bool IsTargetCaptureImplicitlyFirstprivatizeable(const Symbol &symbol, // It is default firstprivatizeable as far as the OpenMP specification is // concerned if it is a non-array scalar type that has been implicitly // captured in a target region - const auto *type{checkSym.GetType() }; + const auto *type{checkSym.GetType()}; if ((!checkSym.GetShape() || checkSym.GetShape()->empty()) && (type->category() == Fortran::semantics::DeclTypeSpec::Category::Numeric || diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h index 357f4c6e54502..a566cdaaebb0c 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h @@ -211,7 +211,9 @@ struct ConstructDecompositionT { const ClauseTy *); bool applyClause(const tomp::clause::SizesT &clause, const ClauseTy *); - bool applyClause(const tomp::clause::PermutationT &clause,const ClauseTy *); + bool + applyClause(const tomp::clause::PermutationT &clause, + const ClauseTy *); bool applyClause(const tomp::clause::PrivateT &clause, const ClauseTy *); bool @@ -505,9 +507,10 @@ bool ConstructDecompositionT::applyClause( return false; } -#if 1 template -bool ConstructDecompositionT::applyClause(const tomp::clause::PermutationT &clause,const ClauseTy *node) { +bool ConstructDecompositionT::applyClause( + const tomp::clause::PermutationT &clause, + const ClauseTy *node) { // Apply "permutation" to the innermost directive. If it's not one that // allows it flag an error. if (!leafs.empty()) { @@ -521,7 +524,6 @@ bool ConstructDecompositionT::applyClause(const tomp::clause::PermutationT return false; } -#endif // PRIVATE // [5.2:111:5-7] diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 65157a04b3ae7..e08747a8218cc 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -91,7 +91,7 @@ void OpenMPDialect::initialize() { #include "mlir/Dialect/OpenMP/OpenMPOpsTypes.cpp.inc" >(); - declarePromisedInterface(); + declarePromisedInterface(); MemRefType::attachInterface(*getContext()); LLVM::LLVMPointerType::attachInterface( @@ -3038,12 +3038,12 @@ void LoopNestOp::build(OpBuilder &builder, OperationState &state, const LoopNestOperands &clauses) { MLIRContext *ctx = builder.getContext(); - auto perm = makeDenseI64ArrayAttr(ctx, clauses.permutation); + auto perm = makeDenseI64ArrayAttr(ctx, clauses.permutation); LoopNestOp::build(builder, state, clauses.loopLowerBounds, clauses.loopUpperBounds, clauses.loopSteps, - clauses.loopInclusive, clauses.numCollapse, - makeDenseI64ArrayAttr(ctx, clauses.tileSizes), - clauses.interchangeEnabled , perm ); + clauses.loopInclusive, clauses.numCollapse, + makeDenseI64ArrayAttr(ctx, clauses.tileSizes), + clauses.interchangeEnabled, perm); } LogicalResult LoopNestOp::verify() { diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index a8f4b3e585f22..8e11f60fdc886 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -2969,6 +2969,7 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); auto loopOp = cast(opInst); + // Set up the source location value for OpenMP runtime. llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); From b675870b4f3de7af06fe6f83c085e75668b77d8f Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 26 Aug 2025 19:52:39 +0200 Subject: [PATCH 35/64] Remove in-development marker --- flang/lib/Lower/OpenMP/OpenMP.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 3ce88a972639d..0fb7003f36084 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -437,13 +437,9 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, innerLoopConstruct.t); const auto &innerDirective = std::get(innerBegin.t); - if (innerDirective.v == llvm::omp::Directive::OMPD_tile) { - middleClauseList = - &std::get(innerBegin.t); - } - if (innerDirective.v == - llvm::omp::Directive::OMPD_interchange) { - llvm_unreachable("MK: Handle this"); + if (innerDirective.v == llvm::omp::Directive::OMPD_tile || + innerDirective.v == + llvm::omp::Directive::OMPD_interchange) { middleClauseList = &std::get(innerBegin.t); } From 2c6fcf57e8417f31771dbb44b24b0d1ff5fbc3f2 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 26 Aug 2025 20:57:44 +0200 Subject: [PATCH 36/64] Reduce change noise --- flang/lib/Lower/OpenMP/OpenMP.cpp | 9 --------- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 3 +-- mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp | 2 +- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 4 +--- 4 files changed, 3 insertions(+), 15 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 8ca96a6658b69..57d5d48be0204 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1734,15 +1734,6 @@ genLoopNestClauses(lower::AbstractConverter &converter, collectTileSizesFromOpenMPConstruct(ompCons, sizeValues, semaCtx); if (sizeValues.size() > 0) clauseOps.tileSizes = sizeValues; - - llvm::SmallVector permutationValues; - collectPermutationFromOpenMPConstruct(ompCons, permutationValues, semaCtx); - if (enableInterchange) { - permutationValues.append({2, 1}); - } - clauseOps.interchangeEnabled = - mlir::BoolAttr::get(firOpBuilder.getContext(), enableInterchange); - clauseOps.permutation = permutationValues; } static void genLoopClauses( diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 7a251130db113..e17315d923317 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -616,8 +616,7 @@ def LoopNestOp : OpenMP_Op<"loop_nest", traits = [ ], clauses = [ OpenMP_LoopRelatedClause, OpenMP_CollapseClause, - OpenMP_TileSizesClause, - OpenMP_PermutationClause + OpenMP_TileSizesClause ], singleRegion = true> { let summary = "rectangular loop nest"; let description = [{ diff --git a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp index f3c7c8e0329e8..19fbefb48a378 100644 --- a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp +++ b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp @@ -494,7 +494,7 @@ struct ParallelOpLowering : public OpRewritePattern { auto loopOp = omp::LoopNestOp::create( rewriter, parallelOp.getLoc(), parallelOp.getLowerBound(), parallelOp.getUpperBound(), parallelOp.getStep(), false, - parallelOp.getLowerBound().size(), nullptr, false, nullptr); + parallelOp.getLowerBound().size(), nullptr); rewriter.inlineRegionBefore(parallelOp.getRegion(), loopOp.getRegion(), loopOp.getRegion().begin()); diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index e08747a8218cc..c59c8889cadcb 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -3038,12 +3038,10 @@ void LoopNestOp::build(OpBuilder &builder, OperationState &state, const LoopNestOperands &clauses) { MLIRContext *ctx = builder.getContext(); - auto perm = makeDenseI64ArrayAttr(ctx, clauses.permutation); LoopNestOp::build(builder, state, clauses.loopLowerBounds, clauses.loopUpperBounds, clauses.loopSteps, clauses.loopInclusive, clauses.numCollapse, - makeDenseI64ArrayAttr(ctx, clauses.tileSizes), - clauses.interchangeEnabled, perm); + makeDenseI64ArrayAttr(ctx, clauses.tileSizes)); } LogicalResult LoopNestOp::verify() { From 70dbb33e91453bce1f5039ac63c538f02b07ce98 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 28 Aug 2025 12:14:51 +0200 Subject: [PATCH 37/64] Allow tests written in Fortran --- llvm/runtimes/CMakeLists.txt | 5 ++++ openmp/CMakeLists.txt | 15 ++++++----- openmp/README.rst | 2 +- openmp/cmake/OpenMPTesting.cmake | 4 +++ openmp/runtime/test/CMakeLists.txt | 8 +++++- openmp/runtime/test/lit.cfg | 15 +++++++++++ openmp/runtime/test/lit.site.cfg.in | 2 ++ .../test/transform/unroll/heuristic_intdo.f90 | 26 +++++++++++++++++++ 8 files changed, 68 insertions(+), 9 deletions(-) create mode 100644 openmp/runtime/test/transform/unroll/heuristic_intdo.f90 diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 86cfd0285aa84..d33b5af5756f5 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -524,6 +524,11 @@ if(build_runtimes) endif() endforeach() endif() + + if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND "flang" IN_LIST LLVM_ENABLE_PROJECTS) + # Allow openmp to see the Fortran compiler + list(APPEND extra_args ENABLE_FORTRAN) + endif() if("openmp" IN_LIST LLVM_ENABLE_RUNTIMES OR "offload" IN_LIST LLVM_ENABLE_RUNTIMES) if (${LLVM_TOOL_FLANG_BUILD}) message(STATUS "Configuring build of omp_lib.mod and omp_lib_kinds.mod via flang") diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt index f3de4bc4ee87b..1e446c8778934 100644 --- a/openmp/CMakeLists.txt +++ b/openmp/CMakeLists.txt @@ -78,13 +78,6 @@ else() set(OPENMP_TEST_CXX_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang++.exe) endif() - # Check for flang - if (NOT MSVC) - set(OPENMP_TEST_Fortran_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/flang) - else() - set(OPENMP_TEST_Fortran_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/flang.exe) - endif() - # Set fortran test compiler if flang is found if (EXISTS "${OPENMP_TEST_Fortran_COMPILER}") message("Using local flang build at ${OPENMP_TEST_Fortran_COMPILER}") @@ -103,6 +96,14 @@ endif() include(config-ix) include(HandleOpenMPOptions) +# Check for flang +set(OPENMP_TEST_Fortran_COMPILER_default "flang") +if (CMAKE_Fortran_COMPILER) + set(OPENMP_TEST_Fortran_COMPILER_default "${CMAKE_Fortran_COMPILER}") +endif () +set(OPENMP_TEST_Fortran_COMPILER "${OPENMP_TEST_Fortran_COMPILER_default}" CACHE STRING + "Fortran compiler to use for testing OpenMP runtime libraries.") + # Set up testing infrastructure. include(OpenMPTesting) diff --git a/openmp/README.rst b/openmp/README.rst index c34d3e8a40d7d..cc485f9a56ce0 100644 --- a/openmp/README.rst +++ b/openmp/README.rst @@ -121,7 +121,7 @@ Options for all Libraries **OPENMP_TEST_Fortran_COMPILER** = ``${CMAKE_Fortran_COMPILER}`` Compiler to use for testing. Defaults to the compiler that was also used for - building. Will default to flang if build is in-tree. + building. **OPENMP_LLVM_TOOLS_DIR** = ``/path/to/built/llvm/tools`` Additional path to search for LLVM tools needed by tests. diff --git a/openmp/cmake/OpenMPTesting.cmake b/openmp/cmake/OpenMPTesting.cmake index 85240aede728d..60280b7ed4893 100644 --- a/openmp/cmake/OpenMPTesting.cmake +++ b/openmp/cmake/OpenMPTesting.cmake @@ -238,6 +238,10 @@ function(add_openmp_testsuite target comment) ) endif() endif() + + if (TARGET flang-rt) + add_dependencies(${target} flang-rt) + endif () endfunction() function(construct_check_openmp_target) diff --git a/openmp/runtime/test/CMakeLists.txt b/openmp/runtime/test/CMakeLists.txt index a7790804542b7..9ee3be6939811 100644 --- a/openmp/runtime/test/CMakeLists.txt +++ b/openmp/runtime/test/CMakeLists.txt @@ -41,7 +41,13 @@ add_library(ompt-print-callback INTERFACE) target_include_directories(ompt-print-callback INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/ompt) -add_openmp_testsuite(check-libomp "Running libomp tests" ${CMAKE_CURRENT_BINARY_DIR} DEPENDS omp) +add_custom_target(libomp-test-depends) +add_dependencies(libomp-test-depends omp) +if (LLVM_RUNTIMES_BUILD AND OPENMP_TEST_Fortran_COMPILER AND "flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) + add_dependencies(libomp-test-depends flang-rt) +endif () + +add_openmp_testsuite(check-libomp "Running libomp tests" ${CMAKE_CURRENT_BINARY_DIR} DEPENDS libomp-test-depends) # Add target check-ompt, but make sure to not add the tests twice to check-openmp. add_openmp_testsuite(check-ompt "Running OMPT tests" ${CMAKE_CURRENT_BINARY_DIR}/ompt EXCLUDE_FROM_CHECK_ALL DEPENDS omp) diff --git a/openmp/runtime/test/lit.cfg b/openmp/runtime/test/lit.cfg index 4a5aff241765c..72da1ba1411f8 100644 --- a/openmp/runtime/test/lit.cfg +++ b/openmp/runtime/test/lit.cfg @@ -5,6 +5,8 @@ import os import re import subprocess import lit.formats +from lit.llvm.subst import ToolSubst +from lit.llvm import llvm_config # Tell pylint that we know config and lit_config exist somewhere. if 'PYLINT_IMPORT' in os.environ: @@ -39,6 +41,19 @@ config.name = 'libomp' # suffixes: A list of file extensions to treat as test files. config.suffixes = ['.c', '.cpp'] +if config.test_fortran_compiler: + lit_config.note("OpenMP Fortran tests enabled") + config.suffixes += ['.f90', '.F90'] + llvm_config.add_tool_substitutions([ + ToolSubst( + "%flang", + command=config.test_fortran_compiler, + unresolved="fatal", + ), + ], [config.llvm_tools_dir]) +else: + lit_config.note("OpenMP Fortran tests disabled") + # test_source_root: The root path where tests are located. config.test_source_root = os.path.dirname(__file__) diff --git a/openmp/runtime/test/lit.site.cfg.in b/openmp/runtime/test/lit.site.cfg.in index fc65289e4ce64..cc8b3b252d7d1 100644 --- a/openmp/runtime/test/lit.site.cfg.in +++ b/openmp/runtime/test/lit.site.cfg.in @@ -2,6 +2,7 @@ config.test_c_compiler = "@OPENMP_TEST_C_COMPILER@" config.test_cxx_compiler = "@OPENMP_TEST_CXX_COMPILER@" +config.test_fortran_compiler = "@OPENMP_TEST_Fortran_COMPILER@" config.test_compiler_features = @OPENMP_TEST_COMPILER_FEATURES@ config.test_compiler_has_omp_h = @OPENMP_TEST_COMPILER_HAS_OMP_H@ config.test_filecheck = "@OPENMP_FILECHECK_EXECUTABLE@" @@ -24,6 +25,7 @@ config.has_omit_frame_pointer_flag = @OPENMP_TEST_COMPILER_HAS_OMIT_FRAME_POINTE config.target_arch = "@LIBOMP_ARCH@" config.compiler_frontend_variant = "@CMAKE_C_COMPILER_FRONTEND_VARIANT@" config.compiler_simulate_id = "@CMAKE_C_SIMULATE_ID@" +config.llvm_tools_dir = "@LLVM_TOOLS_DIR@" import lit.llvm lit.llvm.initialize(lit_config, config) diff --git a/openmp/runtime/test/transform/unroll/heuristic_intdo.f90 b/openmp/runtime/test/transform/unroll/heuristic_intdo.f90 new file mode 100644 index 0000000000000..d0ef938dd3a8f --- /dev/null +++ b/openmp/runtime/test/transform/unroll/heuristic_intdo.f90 @@ -0,0 +1,26 @@ +! This test checks lowering of OpenMP unroll directive + +! RUN: %flang %flags %openmp_flags -fopenmp-version=51 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + + +program unroll_heuristic + integer :: i + print *, 'do' + + !$OMP UNROLL + do i=7, 18, 3 + print '("i=", I0)', i + end do + !$OMP END UNROLL + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 +! CHECK-NEXT: i=10 +! CHECK-NEXT: i=13 +! CHECK-NEXT: i=16 +! CHECK-NEXT: done From f3ec693f124c8940da024a2ae1cd3ae9ed23b18e Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 11 Sep 2025 11:47:59 +0200 Subject: [PATCH 38/64] Revert "[flang][OpenMP] Enable tiling (#143715)" This reverts commit d452e67ee7b5d17aa040f71d8997abc1a47750e4. --- flang/include/flang/Lower/OpenMP.h | 1 + flang/lib/Lower/OpenMP/ClauseProcessor.cpp | 18 +- flang/lib/Lower/OpenMP/ClauseProcessor.h | 5 +- flang/lib/Lower/OpenMP/OpenMP.cpp | 31 ++-- flang/lib/Lower/OpenMP/Utils.cpp | 92 +--------- flang/lib/Lower/OpenMP/Utils.h | 7 +- flang/lib/Semantics/resolve-directives.cpp | 163 ++++-------------- ...nested-loop-transformation-construct01.f90 | 20 +++ .../Lower/OpenMP/parallel-wsloop-lastpriv.f90 | 4 +- flang/test/Lower/OpenMP/simd.f90 | 2 +- flang/test/Lower/OpenMP/wsloop-collapse.f90 | 2 +- flang/test/Lower/OpenMP/wsloop-variable.f90 | 2 +- flang/test/Parser/OpenMP/do-tile-size.f90 | 29 ---- flang/test/Semantics/OpenMP/do-collapse.f90 | 1 - .../OpenMP/do-concurrent-collapse.f90 | 1 - .../Dialect/OpenMP/OpenMPClauseOperands.h | 2 +- .../mlir/Dialect/OpenMP/OpenMPClauses.td | 33 ---- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 15 +- .../Conversion/SCFToOpenMP/SCFToOpenMP.cpp | 6 +- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 64 +------ .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 40 +---- .../Conversion/SCFToOpenMP/scf-to-openmp.mlir | 2 +- mlir/test/Dialect/OpenMP/invalid.mlir | 23 --- mlir/test/Dialect/OpenMP/ops.mlir | 54 ------ .../LLVMIR/omptarget-wsloop-collapsed.mlir | 2 +- mlir/test/Target/LLVMIR/openmp-llvm.mlir | 12 +- 26 files changed, 117 insertions(+), 514 deletions(-) create mode 100644 flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90 delete mode 100644 flang/test/Parser/OpenMP/do-tile-size.f90 diff --git a/flang/include/flang/Lower/OpenMP.h b/flang/include/flang/Lower/OpenMP.h index df01a7b82c66c..581c93f76d627 100644 --- a/flang/include/flang/Lower/OpenMP.h +++ b/flang/include/flang/Lower/OpenMP.h @@ -80,6 +80,7 @@ void genOpenMPDeclarativeConstruct(AbstractConverter &, void genOpenMPSymbolProperties(AbstractConverter &converter, const pft::Variable &var); +int64_t getCollapseValue(const Fortran::parser::OmpClauseList &clauseList); void genThreadprivateOp(AbstractConverter &, const pft::Variable &); void genDeclareTargetIntGlobal(AbstractConverter &, const pft::Variable &); bool isOpenMPTargetConstruct(const parser::OpenMPConstruct &); diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index a96884f5680ba..23f0ca14e931d 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -273,15 +273,10 @@ bool ClauseProcessor::processCancelDirectiveName( bool ClauseProcessor::processCollapse( mlir::Location currentLocation, lower::pft::Evaluation &eval, - mlir::omp::LoopRelatedClauseOps &loopResult, - mlir::omp::CollapseClauseOps &collapseResult, + mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl &iv) const { - - int64_t numCollapse = collectLoopRelatedInfo(converter, currentLocation, eval, - clauses, loopResult, iv); - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - collapseResult.collapseNumLoops = firOpBuilder.getI64IntegerAttr(numCollapse); - return numCollapse > 1; + return collectLoopRelatedInfo(converter, currentLocation, eval, clauses, + result, iv); } bool ClauseProcessor::processDevice(lower::StatementContext &stmtCtx, @@ -527,13 +522,6 @@ bool ClauseProcessor::processProcBind( return false; } -bool ClauseProcessor::processTileSizes( - lower::pft::Evaluation &eval, mlir::omp::LoopNestOperands &result) const { - auto *ompCons{eval.getIf()}; - collectTileSizesFromOpenMPConstruct(ompCons, result.tileSizes, semaCtx); - return !result.tileSizes.empty(); -} - bool ClauseProcessor::processSafelen( mlir::omp::SafelenClauseOps &result) const { if (auto *clause = findUniqueClause()) { diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h index 324ea3c1047a5..c46bdb348a3ef 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.h +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h @@ -63,8 +63,7 @@ class ClauseProcessor { mlir::omp::CancelDirectiveNameClauseOps &result) const; bool processCollapse(mlir::Location currentLocation, lower::pft::Evaluation &eval, - mlir::omp::LoopRelatedClauseOps &loopResult, - mlir::omp::CollapseClauseOps &collapseResult, + mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl &iv) const; bool processDevice(lower::StatementContext &stmtCtx, mlir::omp::DeviceClauseOps &result) const; @@ -99,8 +98,6 @@ class ClauseProcessor { bool processPriority(lower::StatementContext &stmtCtx, mlir::omp::PriorityClauseOps &result) const; bool processProcBind(mlir::omp::ProcBindClauseOps &result) const; - bool processTileSizes(lower::pft::Evaluation &eval, - mlir::omp::LoopNestOperands &result) const; bool processSafelen(mlir::omp::SafelenClauseOps &result) const; bool processSchedule(lower::StatementContext &stmtCtx, mlir::omp::ScheduleClauseOps &result) const; diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 0ec33e6b24dbf..def6cfff88231 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -503,7 +503,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, [[fallthrough]]; case OMPD_distribute: case OMPD_distribute_simd: - cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv); + cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv); break; case OMPD_teams: @@ -522,7 +522,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, [[fallthrough]]; case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: - cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv); + cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv); cp.processNumTeams(stmtCtx, hostInfo->ops); break; @@ -533,7 +533,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, cp.processNumTeams(stmtCtx, hostInfo->ops); [[fallthrough]]; case OMPD_loop: - cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv); + cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv); break; case OMPD_teams_workdistribute: @@ -1569,10 +1569,9 @@ genLoopNestClauses(lower::AbstractConverter &converter, HostEvalInfo *hostEvalInfo = getHostEvalInfoStackTop(converter); if (!hostEvalInfo || !hostEvalInfo->apply(clauseOps, iv)) - cp.processCollapse(loc, eval, clauseOps, clauseOps, iv); + cp.processCollapse(loc, eval, clauseOps, iv); clauseOps.loopInclusive = converter.getFirOpBuilder().getUnitAttr(); - cp.processTileSizes(eval, clauseOps); } static void genLoopClauses( @@ -1949,9 +1948,9 @@ static mlir::omp::LoopNestOp genLoopNestOp( return llvm::SmallVector(iv); }; - uint64_t nestValue = getCollapseValue(item->clauses); - nestValue = nestValue < iv.size() ? iv.size() : nestValue; - auto *nestedEval = getCollapsedLoopEval(eval, nestValue); + auto *nestedEval = + getCollapsedLoopEval(eval, getCollapseValue(item->clauses)); + return genOpWithBody( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, *nestedEval, directive) @@ -3844,8 +3843,8 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, parser::omp::GetOmpDirectiveName(*ompNestedLoopCons).v; switch (nestedDirective) { case llvm::omp::Directive::OMPD_tile: - // Skip OMPD_tile since the tile sizes will be retrieved when - // generating the omp.loop_nest op. + // Emit the omp.loop_nest with annotation for tiling + genOMP(converter, symTable, semaCtx, eval, ompNestedLoopCons->value()); break; default: { unsigned version = semaCtx.langOptions().OpenMPVersion; @@ -3958,6 +3957,18 @@ void Fortran::lower::genOpenMPSymbolProperties( lower::genDeclareTargetIntGlobal(converter, var); } +int64_t +Fortran::lower::getCollapseValue(const parser::OmpClauseList &clauseList) { + for (const parser::OmpClause &clause : clauseList.v) { + if (const auto &collapseClause = + std::get_if(&clause.u)) { + const auto *expr = semantics::GetExpr(collapseClause->v); + return evaluate::ToInt64(*expr).value(); + } + } + return 1; +} + void Fortran::lower::genThreadprivateOp(lower::AbstractConverter &converter, const lower::pft::Variable &var) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index d1d1cd68a5b44..cb6dd57667824 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -13,7 +13,6 @@ #include "Utils.h" #include "ClauseFinder.h" -#include "flang/Evaluate/fold.h" #include "flang/Lower/OpenMP/Clauses.h" #include #include @@ -25,32 +24,11 @@ #include #include #include -#include #include #include #include -template -Fortran::semantics::MaybeIntExpr -EvaluateIntExpr(Fortran::semantics::SemanticsContext &context, const T &expr) { - if (Fortran::semantics::MaybeExpr maybeExpr{ - Fold(context.foldingContext(), AnalyzeExpr(context, expr))}) { - if (auto *intExpr{ - Fortran::evaluate::UnwrapExpr( - *maybeExpr)}) { - return std::move(*intExpr); - } - } - return std::nullopt; -} - -template -std::optional -EvaluateInt64(Fortran::semantics::SemanticsContext &context, const T &expr) { - return Fortran::evaluate::ToInt64(EvaluateIntExpr(context, expr)); -} - llvm::cl::opt treatIndexAsSection( "openmp-treat-index-as-section", llvm::cl::desc("In the OpenMP data clauses treat `a(N)` as `a(N:N)`."), @@ -599,64 +577,12 @@ static void convertLoopBounds(lower::AbstractConverter &converter, } } -// Helper function that finds the sizes clause in a inner OMPD_tile directive -// and passes the sizes clause to the callback function if found. -static void processTileSizesFromOpenMPConstruct( - const parser::OpenMPConstruct *ompCons, - std::function processFun) { - if (!ompCons) - return; - if (auto *ompLoop{std::get_if(&ompCons->u)}) { - const auto &nestedOptional = - std::get>(ompLoop->t); - assert(nestedOptional.has_value() && - "Expected a DoConstruct or OpenMPLoopConstruct"); - const auto *innerConstruct = - std::get_if>( - &(nestedOptional.value())); - if (innerConstruct) { - const auto &innerLoopDirective = innerConstruct->value(); - const auto &innerBegin = - std::get(innerLoopDirective.t); - const auto &innerDirective = - std::get(innerBegin.t).v; - - if (innerDirective == llvm::omp::Directive::OMPD_tile) { - // Get the size values from parse tree and convert to a vector. - const auto &innerClauseList{ - std::get(innerBegin.t)}; - for (const auto &clause : innerClauseList.v) { - if (const auto tclause{ - std::get_if(&clause.u)}) { - processFun(tclause); - break; - } - } - } - } - } -} - -/// Populates the sizes vector with values if the given OpenMPConstruct -/// contains a loop construct with an inner tiling construct. -void collectTileSizesFromOpenMPConstruct( - const parser::OpenMPConstruct *ompCons, - llvm::SmallVectorImpl &tileSizes, - Fortran::semantics::SemanticsContext &semaCtx) { - processTileSizesFromOpenMPConstruct( - ompCons, [&](const parser::OmpClause::Sizes *tclause) { - for (auto &tval : tclause->v) - if (const auto v{EvaluateInt64(semaCtx, tval)}) - tileSizes.push_back(*v); - }); -} - -int64_t collectLoopRelatedInfo( +bool collectLoopRelatedInfo( lower::AbstractConverter &converter, mlir::Location currentLocation, lower::pft::Evaluation &eval, const omp::List &clauses, mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl &iv) { - int64_t numCollapse = 1; + bool found = false; fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); // Collect the loops to collapse. @@ -669,19 +595,9 @@ int64_t collectLoopRelatedInfo( if (auto *clause = ClauseFinder::findUniqueClause(clauses)) { collapseValue = evaluate::ToInt64(clause->v).value(); - numCollapse = collapseValue; - } - - // Collect sizes from tile directive if present. - std::int64_t sizesLengthValue = 0l; - if (auto *ompCons{eval.getIf()}) { - processTileSizesFromOpenMPConstruct( - ompCons, [&](const parser::OmpClause::Sizes *tclause) { - sizesLengthValue = tclause->v.size(); - }); + found = true; } - collapseValue = std::max(collapseValue, sizesLengthValue); std::size_t loopVarTypeSize = 0; do { lower::pft::Evaluation *doLoop = @@ -715,7 +631,7 @@ int64_t collectLoopRelatedInfo( convertLoopBounds(converter, currentLocation, result, loopVarTypeSize); - return numCollapse; + return found; } } // namespace omp diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h index 5f191d89ae205..88371ab8bf969 100644 --- a/flang/lib/Lower/OpenMP/Utils.h +++ b/flang/lib/Lower/OpenMP/Utils.h @@ -159,17 +159,12 @@ void genObjectList(const ObjectList &objects, void lastprivateModifierNotSupported(const omp::clause::Lastprivate &lastp, mlir::Location loc); -int64_t collectLoopRelatedInfo( +bool collectLoopRelatedInfo( lower::AbstractConverter &converter, mlir::Location currentLocation, lower::pft::Evaluation &eval, const omp::List &clauses, mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl &iv); -void collectTileSizesFromOpenMPConstruct( - const parser::OpenMPConstruct *ompCons, - llvm::SmallVectorImpl &tileSizes, - Fortran::semantics::SemanticsContext &semaCtx); - } // namespace omp } // namespace lower } // namespace Fortran diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 1b7718d1314d3..43f12c2b14038 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -856,23 +856,7 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { const parser::OmpClause *GetAssociatedClause() { return associatedClause; } private: - /// Given a vector of loop levels and a vector of corresponding clauses find - /// the largest loop level and set the associated loop level to the found - /// maximum. This is used for error handling to ensure that the number of - /// affected loops is not larger that the number of available loops. - std::int64_t SetAssociatedMaxClause(llvm::SmallVector &, - llvm::SmallVector &); - std::int64_t GetNumAffectedLoopsFromLoopConstruct( - const parser::OpenMPLoopConstruct &); - void CollectNumAffectedLoopsFromLoopConstruct( - const parser::OpenMPLoopConstruct &, llvm::SmallVector &, - llvm::SmallVector &); - void CollectNumAffectedLoopsFromInnerLoopContruct( - const parser::OpenMPLoopConstruct &, llvm::SmallVector &, - llvm::SmallVector &); - void CollectNumAffectedLoopsFromClauses(const parser::OmpClauseList &, - llvm::SmallVector &, - llvm::SmallVector &); + std::int64_t GetAssociatedLoopLevelFromClauses(const parser::OmpClauseList &); Symbol::Flags dataSharingAttributeFlags{Symbol::Flag::OmpShared, Symbol::Flag::OmpPrivate, Symbol::Flag::OmpFirstPrivate, @@ -1884,6 +1868,7 @@ bool OmpAttributeVisitor::Pre( bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { const auto &beginLoopDir{std::get(x.t)}; const auto &beginDir{std::get(beginLoopDir.t)}; + const auto &clauseList{std::get(beginLoopDir.t)}; switch (beginDir.v) { case llvm::omp::Directive::OMPD_distribute: case llvm::omp::Directive::OMPD_distribute_parallel_do: @@ -1934,7 +1919,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { beginDir.v == llvm::omp::Directive::OMPD_target_loop) IssueNonConformanceWarning(beginDir.v, beginDir.source, 52); ClearDataSharingAttributeObjects(); - SetContextAssociatedLoopLevel(GetNumAffectedLoopsFromLoopConstruct(x)); + SetContextAssociatedLoopLevel(GetAssociatedLoopLevelFromClauses(clauseList)); if (beginDir.v == llvm::omp::Directive::OMPD_do) { auto &optLoopCons = std::get>(x.t); @@ -1948,7 +1933,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { } } PrivatizeAssociatedLoopIndexAndCheckLoopLevel(x); - ordCollapseLevel = GetNumAffectedLoopsFromLoopConstruct(x) + 1; + ordCollapseLevel = GetAssociatedLoopLevelFromClauses(clauseList) + 1; return true; } @@ -2036,111 +2021,44 @@ bool OmpAttributeVisitor::Pre(const parser::DoConstruct &x) { return true; } -static bool isSizesClause(const parser::OmpClause *clause) { - return std::holds_alternative(clause->u); -} - -std::int64_t OmpAttributeVisitor::SetAssociatedMaxClause( - llvm::SmallVector &levels, - llvm::SmallVector &clauses) { - - // Find the tile level to ensure that the COLLAPSE clause value - // does not exeed the number of tiled loops. - std::int64_t tileLevel = 0; - for (auto [level, clause] : llvm::zip_equal(levels, clauses)) - if (isSizesClause(clause)) - tileLevel = level; - - std::int64_t maxLevel = 1; - const parser::OmpClause *maxClause = nullptr; - for (auto [level, clause] : llvm::zip_equal(levels, clauses)) { - if (tileLevel > 0 && tileLevel < level) { - context_.Say(clause->source, - "The value of the parameter in the COLLAPSE clause must" - " not be larger than the number of the number of tiled loops" - " because collapse currently is limited to independent loop" - " iterations."_err_en_US); - return 1; - } - - if (level > maxLevel) { - maxLevel = level; - maxClause = clause; - } - } - if (maxClause) - SetAssociatedClause(maxClause); - return maxLevel; -} - -std::int64_t OmpAttributeVisitor::GetNumAffectedLoopsFromLoopConstruct( - const parser::OpenMPLoopConstruct &x) { - llvm::SmallVector levels; - llvm::SmallVector clauses; - - CollectNumAffectedLoopsFromLoopConstruct(x, levels, clauses); - return SetAssociatedMaxClause(levels, clauses); -} - -void OmpAttributeVisitor::CollectNumAffectedLoopsFromLoopConstruct( - const parser::OpenMPLoopConstruct &x, - llvm::SmallVector &levels, - llvm::SmallVector &clauses) { - const auto &beginLoopDir{std::get(x.t)}; - const auto &clauseList{std::get(beginLoopDir.t)}; - - CollectNumAffectedLoopsFromClauses(clauseList, levels, clauses); - CollectNumAffectedLoopsFromInnerLoopContruct(x, levels, clauses); -} - -void OmpAttributeVisitor::CollectNumAffectedLoopsFromInnerLoopContruct( - const parser::OpenMPLoopConstruct &x, - llvm::SmallVector &levels, - llvm::SmallVector &clauses) { +std::int64_t OmpAttributeVisitor::GetAssociatedLoopLevelFromClauses( + const parser::OmpClauseList &x) { + std::int64_t orderedLevel{0}; + std::int64_t collapseLevel{0}; - const auto &nestedOptional = - std::get>(x.t); - assert(nestedOptional.has_value() && - "Expected a DoConstruct or OpenMPLoopConstruct"); - const auto *innerConstruct = - std::get_if>( - &(nestedOptional.value())); + const parser::OmpClause *ordClause{nullptr}; + const parser::OmpClause *collClause{nullptr}; - if (innerConstruct) { - CollectNumAffectedLoopsFromLoopConstruct( - innerConstruct->value(), levels, clauses); - } -} - -void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses( - const parser::OmpClauseList &x, llvm::SmallVector &levels, - llvm::SmallVector &clauses) { for (const auto &clause : x.v) { - if (const auto oclause{ + if (const auto *orderedClause{ std::get_if(&clause.u)}) { - std::int64_t level = 0; - if (const auto v{EvaluateInt64(context_, oclause->v)}) { - level = *v; + if (const auto v{EvaluateInt64(context_, orderedClause->v)}) { + orderedLevel = *v; } - levels.push_back(level); - clauses.push_back(&clause); + ordClause = &clause; } - - if (const auto cclause{ + if (const auto *collapseClause{ std::get_if(&clause.u)}) { - std::int64_t level = 0; - if (const auto v{EvaluateInt64(context_, cclause->v)}) { - level = *v; + if (const auto v{EvaluateInt64(context_, collapseClause->v)}) { + collapseLevel = *v; } - levels.push_back(level); - clauses.push_back(&clause); + collClause = &clause; } + } - if (const auto tclause{std::get_if(&clause.u)}) { - levels.push_back(tclause->v.size()); - clauses.push_back(&clause); - } + if (orderedLevel && (!collapseLevel || orderedLevel >= collapseLevel)) { + SetAssociatedClause(ordClause); + return orderedLevel; + } else if (!orderedLevel && collapseLevel) { + SetAssociatedClause(collClause); + return collapseLevel; + } else { + SetAssociatedClause(nullptr); } + // orderedLevel < collapseLevel is an error handled in structural + // checks + + return 1; // default is outermost loop } // 2.15.1.1 Data-sharing Attribute Rules - Predetermined @@ -2172,21 +2090,10 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel( const parser::OmpClause *clause{GetAssociatedClause()}; bool hasCollapseClause{ clause ? (clause->Id() == llvm::omp::OMPC_collapse) : false}; - const parser::OpenMPLoopConstruct *innerMostLoop = &x; - const parser::NestedConstruct *innerMostNest = nullptr; - while (auto &optLoopCons{ - std::get>(innerMostLoop->t)}) { - innerMostNest = &(optLoopCons.value()); - if (const auto *innerLoop{ - std::get_if>( - innerMostNest)}) { - innerMostLoop = &(innerLoop->value()); - } else - break; - } - if (innerMostNest) { - if (const auto &outer{std::get_if(innerMostNest)}) { + auto &optLoopCons = std::get>(x.t); + if (optLoopCons.has_value()) { + if (const auto &outer{std::get_if(&*optLoopCons)}) { for (const parser::DoConstruct *loop{&*outer}; loop && level > 0; --level) { if (loop->IsDoConcurrent()) { @@ -2222,7 +2129,7 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel( CheckAssocLoopLevel(level, GetAssociatedClause()); } else if (const auto &loop{std::get_if< common::Indirection>( - innerMostNest)}) { + &*optLoopCons)}) { auto &beginDirective = std::get(loop->value().t); auto &beginLoopDirective = diff --git a/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90 b/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90 new file mode 100644 index 0000000000000..17eba93a7405d --- /dev/null +++ b/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90 @@ -0,0 +1,20 @@ +! Test to ensure TODO message is emitted for tile OpenMP 5.1 Directives when they are nested. + +!RUN: not %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 -o - %s 2>&1 | FileCheck %s + +subroutine loop_transformation_construct + implicit none + integer :: I = 10 + integer :: x + integer :: y(I) + + !$omp do + !$omp tile + do i = 1, I + y(i) = y(i) * 5 + end do + !$omp end tile + !$omp end do +end subroutine + +!CHECK: not yet implemented: Unhandled loop directive (tile) diff --git a/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90 b/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90 index faf8f717f6308..2890e78e9d17f 100644 --- a/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90 +++ b/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90 @@ -108,7 +108,7 @@ subroutine omp_do_lastprivate_collapse2(a) ! CHECK-NEXT: %[[UB2:.*]] = fir.load %[[ARG0_DECL]]#0 : !fir.ref ! CHECK-NEXT: %[[STEP2:.*]] = arith.constant 1 : i32 ! CHECK-NEXT: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[A_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[I_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[J_PVT_REF:.*]] : !fir.ref, !fir.ref, !fir.ref) { - ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[LB1]], %[[LB2]]) to (%[[UB1]], %[[UB2]]) inclusive step (%[[STEP1]], %[[STEP2]]) collapse(2) { + ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[LB1]], %[[LB2]]) to (%[[UB1]], %[[UB2]]) inclusive step (%[[STEP1]], %[[STEP2]]) { ! CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ea"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ei"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[J_PVT_DECL:.*]]:2 = hlfir.declare %[[J_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ej"} : (!fir.ref) -> (!fir.ref, !fir.ref) @@ -174,7 +174,7 @@ subroutine omp_do_lastprivate_collapse3(a) ! CHECK-NEXT: %[[UB3:.*]] = fir.load %[[ARG0_DECL]]#0 : !fir.ref ! CHECK-NEXT: %[[STEP3:.*]] = arith.constant 1 : i32 ! CHECK-NEXT: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[A_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[I_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[J_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[K_PVT_REF:.*]] : !fir.ref, !fir.ref, !fir.ref, !fir.ref) { - ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]], %[[ARG3:.*]]) : i32 = (%[[LB1]], %[[LB2]], %[[LB3]]) to (%[[UB1]], %[[UB2]], %[[UB3]]) inclusive step (%[[STEP1]], %[[STEP2]], %[[STEP3]]) collapse(3) { + ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]], %[[ARG3:.*]]) : i32 = (%[[LB1]], %[[LB2]], %[[LB3]]) to (%[[UB1]], %[[UB2]], %[[UB3]]) inclusive step (%[[STEP1]], %[[STEP2]], %[[STEP3]]) { ! CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ea"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ei"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[J_PVT_DECL:.*]]:2 = hlfir.declare %[[J_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ej"} : (!fir.ref) -> (!fir.ref, !fir.ref) diff --git a/flang/test/Lower/OpenMP/simd.f90 b/flang/test/Lower/OpenMP/simd.f90 index 369b5eb072af9..7655c786573e3 100644 --- a/flang/test/Lower/OpenMP/simd.f90 +++ b/flang/test/Lower/OpenMP/simd.f90 @@ -175,7 +175,7 @@ subroutine simd_with_collapse_clause(n) ! CHECK-NEXT: omp.loop_nest (%[[ARG_0:.*]], %[[ARG_1:.*]]) : i32 = ( ! CHECK-SAME: %[[LOWER_I]], %[[LOWER_J]]) to ( ! CHECK-SAME: %[[UPPER_I]], %[[UPPER_J]]) inclusive step ( - ! CHECK-SAME: %[[STEP_I]], %[[STEP_J]]) collapse(2) { + ! CHECK-SAME: %[[STEP_I]], %[[STEP_J]]) { !$OMP SIMD COLLAPSE(2) do i = 1, n do j = 1, n diff --git a/flang/test/Lower/OpenMP/wsloop-collapse.f90 b/flang/test/Lower/OpenMP/wsloop-collapse.f90 index 677c7809c397f..7ec40ab4b2f43 100644 --- a/flang/test/Lower/OpenMP/wsloop-collapse.f90 +++ b/flang/test/Lower/OpenMP/wsloop-collapse.f90 @@ -57,7 +57,7 @@ program wsloop_collapse !CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref !CHECK: %[[VAL_32:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_4:.*]], @{{.*}} %{{.*}}#0 -> %[[VAL_2:.*]], @{{.*}} %{{.*}}#0 -> %[[VAL_0:.*]] : !fir.ref, !fir.ref, !fir.ref) { -!CHECK-NEXT: omp.loop_nest (%[[VAL_33:.*]], %[[VAL_34:.*]], %[[VAL_35:.*]]) : i32 = (%[[VAL_24]], %[[VAL_27]], %[[VAL_30]]) to (%[[VAL_25]], %[[VAL_28]], %[[VAL_31]]) inclusive step (%[[VAL_26]], %[[VAL_29]], %[[VAL_32]]) collapse(3) { +!CHECK-NEXT: omp.loop_nest (%[[VAL_33:.*]], %[[VAL_34:.*]], %[[VAL_35:.*]]) : i32 = (%[[VAL_24]], %[[VAL_27]], %[[VAL_30]]) to (%[[VAL_25]], %[[VAL_28]], %[[VAL_31]]) inclusive step (%[[VAL_26]], %[[VAL_29]], %[[VAL_32]]) { !$omp do collapse(3) do i = 1, a do j= 1, b diff --git a/flang/test/Lower/OpenMP/wsloop-variable.f90 b/flang/test/Lower/OpenMP/wsloop-variable.f90 index 0f4aafb10ded3..f998c84331ce4 100644 --- a/flang/test/Lower/OpenMP/wsloop-variable.f90 +++ b/flang/test/Lower/OpenMP/wsloop-variable.f90 @@ -22,7 +22,7 @@ program wsloop_variable !CHECK: %[[TMP6:.*]] = fir.convert %[[TMP1]] : (i32) -> i64 !CHECK: %[[TMP7:.*]] = fir.convert %{{.*}} : (i32) -> i64 !CHECK: omp.wsloop private({{.*}}) { -!CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) collapse(2) { +!CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) { !CHECK: %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i64) -> i16 !CHECK: hlfir.assign %[[ARG0_I16]] to %[[STORE_IV0:.*]]#0 : i16, !fir.ref !CHECK: hlfir.assign %[[ARG1]] to %[[STORE_IV1:.*]]#0 : i64, !fir.ref diff --git a/flang/test/Parser/OpenMP/do-tile-size.f90 b/flang/test/Parser/OpenMP/do-tile-size.f90 deleted file mode 100644 index 886ee4a2a680c..0000000000000 --- a/flang/test/Parser/OpenMP/do-tile-size.f90 +++ /dev/null @@ -1,29 +0,0 @@ -! RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=51 %s | FileCheck --ignore-case %s -! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=51 %s | FileCheck --check-prefix="PARSE-TREE" %s - -subroutine openmp_do_tiles(x) - - integer, intent(inout)::x - - -!CHECK: !$omp do -!CHECK: !$omp tile sizes -!$omp do -!$omp tile sizes(2) -!CHECK: do - do x = 1, 100 - call F1() -!CHECK: end do - end do -!CHECK: !$omp end tile -!$omp end tile -!$omp end do - -!PARSE-TREE:| | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct -!PARSE-TREE:| | | OmpBeginLoopDirective -!PARSE-TREE:| | | OpenMPLoopConstruct -!PARSE-TREE:| | | | OmpBeginLoopDirective -!PARSE-TREE:| | | | | OmpLoopDirective -> llvm::omp::Directive = tile -!PARSE-TREE:| | | | | OmpClauseList -> OmpClause -> Sizes -> Scalar -> Integer -> Expr = '2_4' -!PARSE-TREE: | | | | DoConstruct -END subroutine openmp_do_tiles diff --git a/flang/test/Semantics/OpenMP/do-collapse.f90 b/flang/test/Semantics/OpenMP/do-collapse.f90 index ec6a3bdad3686..480bd45b79b83 100644 --- a/flang/test/Semantics/OpenMP/do-collapse.f90 +++ b/flang/test/Semantics/OpenMP/do-collapse.f90 @@ -31,7 +31,6 @@ program omp_doCollapse end do end do - !ERROR: The value of the parameter in the COLLAPSE or ORDERED clause must not be larger than the number of nested loops following the construct. !ERROR: At most one COLLAPSE clause can appear on the SIMD directive !$omp simd collapse(2) collapse(1) do i = 1, 4 diff --git a/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 b/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 index 355626f6e73b9..bb1929249183b 100644 --- a/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 +++ b/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 @@ -1,7 +1,6 @@ !RUN: %python %S/../test_errors.py %s %flang -fopenmp integer :: i, j -! ERROR: DO CONCURRENT loops cannot be used with the COLLAPSE clause. !$omp parallel do collapse(2) do i = 1, 1 ! ERROR: DO CONCURRENT loops cannot form part of a loop nest. diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h index 6a92b136ef51c..faf820dcfdb29 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h @@ -40,7 +40,7 @@ struct DeviceTypeClauseOps { /// Clauses that correspond to operations other than omp.target, but might have /// to be evaluated outside of a parent target region. using HostEvaluatedOperands = - detail::Clauses; // TODO: Add `indirect` clause. diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td index 5f40abe62a0f6..311c57fb4446c 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td @@ -209,23 +209,6 @@ class OpenMP_BindClauseSkip< def OpenMP_BindClause : OpenMP_BindClauseSkip<>; -//===----------------------------------------------------------------------===// -// V5.2: [4.4.3] `collapse` clause -//===----------------------------------------------------------------------===// - -class OpenMP_CollapseClauseSkip< - bit traits = false, bit arguments = false, bit assemblyFormat = false, - bit description = false, bit extraClassDeclaration = false - > : OpenMP_Clause { - let arguments = (ins - ConfinedAttr, [IntMinValue<1>]> - :$collapse_num_loops - ); -} - -def OpenMP_CollapseClause : OpenMP_CollapseClauseSkip<>; - //===----------------------------------------------------------------------===// // V5.2: [5.7.2] `copyprivate` clause //===----------------------------------------------------------------------===// @@ -1402,22 +1385,6 @@ class OpenMP_ThreadLimitClauseSkip< def OpenMP_ThreadLimitClause : OpenMP_ThreadLimitClauseSkip<>; -//===----------------------------------------------------------------------===// -// V5.2: [9.1.1] `sizes` clause -//===----------------------------------------------------------------------===// - -class OpenMP_TileSizesClauseSkip< - bit traits = false, bit arguments = false, bit assemblyFormat = false, - bit description = false, bit extraClassDeclaration = false - > : OpenMP_Clause { - let arguments = (ins - OptionalAttr:$tile_sizes - ); -} - -def OpenMP_TileSizesClause : OpenMP_TileSizesClauseSkip<>; - //===----------------------------------------------------------------------===// // V5.2: [12.1] `untied` clause //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 830b36f440098..2548a8ab4aac6 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -614,18 +614,13 @@ def WorkshareLoopWrapperOp : OpenMP_Op<"workshare.loop_wrapper", traits = [ def LoopNestOp : OpenMP_Op<"loop_nest", traits = [ RecursiveMemoryEffects, SameVariadicOperandSize ], clauses = [ - OpenMP_CollapseClause, - OpenMP_LoopRelatedClause, - OpenMP_TileSizesClause + OpenMP_LoopRelatedClause ], singleRegion = true> { let summary = "rectangular loop nest"; let description = [{ - This operation represents a rectangular loop nest which may be collapsed - and/or tiled. For each rectangular loop of the nest represented by an - instance of this operation, lower and upper bounds, as well as a step - variable, must be defined. The collapse clause specifies how many loops - that should be collapsed (1 if no collapse is done) after any tiling is - performed. The tiling sizes is represented by the tile sizes clause. + This operation represents a collapsed rectangular loop nest. For each + rectangular loop of the nest represented by an instance of this operation, + lower and upper bounds, as well as a step variable, must be defined. The lower and upper bounds specify a half-open range: the range includes the lower bound but does not include the upper bound. If the `loop_inclusive` @@ -638,7 +633,7 @@ def LoopNestOp : OpenMP_Op<"loop_nest", traits = [ `loop_steps` arguments. ```mlir - omp.loop_nest (%i1, %i2) : i32 = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) collapse(2) tiles(5,5) { + omp.loop_nest (%i1, %i2) : i32 = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) { %a = load %arrA[%i1, %i2] : memref %b = load %arrB[%i1, %i2] : memref %sum = arith.addf %a, %b : f32 diff --git a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp index 460595ba9f254..c4a9fc2e556f1 100644 --- a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp +++ b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp @@ -492,10 +492,8 @@ struct ParallelOpLowering : public OpRewritePattern { // Create loop nest and populate region with contents of scf.parallel. auto loopOp = omp::LoopNestOp::create( - rewriter, parallelOp.getLoc(), parallelOp.getLowerBound().size(), - parallelOp.getLowerBound(), parallelOp.getUpperBound(), - parallelOp.getStep(), /*loop_inclusive=*/false, - /*tile_sizes=*/nullptr); + rewriter, parallelOp.getLoc(), parallelOp.getLowerBound(), + parallelOp.getUpperBound(), parallelOp.getStep()); rewriter.inlineRegionBefore(parallelOp.getRegion(), loopOp.getRegion(), loopOp.getRegion().begin()); diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index aa88b9e8eef5a..6e43f28e8d93d 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -56,11 +56,6 @@ makeDenseBoolArrayAttr(MLIRContext *ctx, const ArrayRef boolArray) { return boolArray.empty() ? nullptr : DenseBoolArrayAttr::get(ctx, boolArray); } -static DenseI64ArrayAttr -makeDenseI64ArrayAttr(MLIRContext *ctx, const ArrayRef intArray) { - return intArray.empty() ? nullptr : DenseI64ArrayAttr::get(ctx, intArray); -} - namespace { struct MemRefPointerLikeModel : public PointerLikeType::ExternalModel steps; @@ -2972,35 +2967,6 @@ ParseResult LoopNestOp::parse(OpAsmParser &parser, OperationState &result) { parser.parseOperandList(steps, ivs.size(), OpAsmParser::Delimiter::Paren)) return failure(); - // Parse collapse - int64_t value = 0; - if (!parser.parseOptionalKeyword("collapse") && - (parser.parseLParen() || parser.parseInteger(value) || - parser.parseRParen())) - return failure(); - if (value > 1) - result.addAttribute( - "collapse_num_loops", - IntegerAttr::get(parser.getBuilder().getI64Type(), value)); - - // Parse tiles - SmallVector tiles; - auto parseTiles = [&]() -> ParseResult { - int64_t tile; - if (parser.parseInteger(tile)) - return failure(); - tiles.push_back(tile); - return success(); - }; - - if (!parser.parseOptionalKeyword("tiles") && - (parser.parseLParen() || parser.parseCommaSeparatedList(parseTiles) || - parser.parseRParen())) - return failure(); - - if (tiles.size() > 0) - result.addAttribute("tile_sizes", DenseI64ArrayAttr::get(ctx, tiles)); - // Parse the body. Region *region = result.addRegion(); if (parser.parseRegion(*region, ivs)) @@ -3024,23 +2990,14 @@ void LoopNestOp::print(OpAsmPrinter &p) { if (getLoopInclusive()) p << "inclusive "; p << "step (" << getLoopSteps() << ") "; - if (int64_t numCollapse = getCollapseNumLoops()) - if (numCollapse > 1) - p << "collapse(" << numCollapse << ") "; - - if (const auto tiles = getTileSizes()) - p << "tiles(" << tiles.value() << ") "; - p.printRegion(region, /*printEntryBlockArgs=*/false); } void LoopNestOp::build(OpBuilder &builder, OperationState &state, const LoopNestOperands &clauses) { - MLIRContext *ctx = builder.getContext(); - LoopNestOp::build(builder, state, clauses.collapseNumLoops, - clauses.loopLowerBounds, clauses.loopUpperBounds, - clauses.loopSteps, clauses.loopInclusive, - makeDenseI64ArrayAttr(ctx, clauses.tileSizes)); + LoopNestOp::build(builder, state, clauses.loopLowerBounds, + clauses.loopUpperBounds, clauses.loopSteps, + clauses.loopInclusive); } LogicalResult LoopNestOp::verify() { @@ -3056,17 +3013,6 @@ LogicalResult LoopNestOp::verify() { << "range argument type does not match corresponding IV type"; } - uint64_t numIVs = getIVs().size(); - - if (const auto &numCollapse = getCollapseNumLoops()) - if (numCollapse > numIVs) - return emitOpError() - << "collapse value is larger than the number of loops"; - - if (const auto &tiles = getTileSizes()) - if (tiles.value().size() > numIVs) - return emitOpError() << "too few canonical loops for tile dimensions"; - if (!llvm::dyn_cast_if_present((*this)->getParentOp())) return emitOpError() << "expects parent op to be a loop wrapper"; diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 2ab6bb0a73200..4e26e65cf9718 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -3041,46 +3041,16 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, loopInfos.push_back(*loopResult); } + // Collapse loops. Store the insertion point because LoopInfos may get + // invalidated. llvm::OpenMPIRBuilder::InsertPointTy afterIP = loopInfos.front()->getAfterIP(); - // Do tiling. - if (const auto &tiles = loopOp.getTileSizes()) { - llvm::Type *ivType = loopInfos.front()->getIndVarType(); - SmallVector tileSizes; - - for (auto tile : tiles.value()) { - llvm::Value *tileVal = llvm::ConstantInt::get(ivType, tile); - tileSizes.push_back(tileVal); - } - - std::vector newLoops = - ompBuilder->tileLoops(ompLoc.DL, loopInfos, tileSizes); - - // Update afterIP to get the correct insertion point after - // tiling. - llvm::BasicBlock *afterBB = newLoops.front()->getAfter(); - llvm::BasicBlock *afterAfterBB = afterBB->getSingleSuccessor(); - afterIP = {afterAfterBB, afterAfterBB->begin()}; - - // Update the loop infos. - loopInfos.clear(); - for (const auto &newLoop : newLoops) - loopInfos.push_back(newLoop); - } // Tiling done. - - // Do collapse. - const auto &numCollapse = loopOp.getCollapseNumLoops(); - SmallVector collapseLoopInfos( - loopInfos.begin(), loopInfos.begin() + (numCollapse)); - - auto newTopLoopInfo = - ompBuilder->collapseLoops(ompLoc.DL, collapseLoopInfos, {}); - - assert(newTopLoopInfo && "New top loop information is missing"); + // Update the stack frame created for this loop to point to the resulting loop + // after applying transformations. moduleTranslation.stackWalk( [&](OpenMPLoopInfoStackFrame &frame) { - frame.loopInfo = newTopLoopInfo; + frame.loopInfo = ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {}); return WalkResult::interrupt(); }); diff --git a/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir b/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir index d362bb6092419..a722acbf2c347 100644 --- a/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir +++ b/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir @@ -6,7 +6,7 @@ func.func @parallel(%arg0: index, %arg1: index, %arg2: index, // CHECK: %[[FOUR:.+]] = llvm.mlir.constant(4 : i32) : i32 // CHECK: omp.parallel num_threads(%[[FOUR]] : i32) { // CHECK: omp.wsloop { - // CHECK: omp.loop_nest (%[[LVAR1:.*]], %[[LVAR2:.*]]) : index = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) collapse(2) { + // CHECK: omp.loop_nest (%[[LVAR1:.*]], %[[LVAR2:.*]]) : index = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) { // CHECK: memref.alloca_scope scf.parallel (%i, %j) = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) { // CHECK: "test.payload"(%[[LVAR1]], %[[LVAR2]]) : (index, index) -> () diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir index 763f41c5420b8..986c3844d0bb9 100644 --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -157,29 +157,6 @@ func.func @no_loops(%lb : index, %ub : index, %step : index) { } } -// ----- - -func.func @collapse_size(%lb : index, %ub : index, %step : index) { - omp.wsloop { - // expected-error@+1 {{collapse value is larger than the number of loops}} - omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) collapse(4) { - omp.yield - } - } -} - -// ----- - -func.func @tiles_length(%lb : index, %ub : index, %step : index) { - omp.wsloop { - // expected-error@+1 {{op too few canonical loops for tile dimensions}} - omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) tiles(2, 4) { - omp.yield - } - } -} - - // ----- func.func @inclusive_not_a_clause(%lb : index, %ub : index, %step : index) { diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index 60b1f61135ac2..3c2e0a3b7cc15 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -376,60 +376,6 @@ func.func @omp_loop_nest_pretty_multiple(%lb1 : i32, %ub1 : i32, %step1 : i32, return } -// CHECK-LABEL: omp_loop_nest_pretty_multiple_collapse -func.func @omp_loop_nest_pretty_multiple_collapse(%lb1 : i32, %ub1 : i32, %step1 : i32, - %lb2 : i32, %ub2 : i32, %step2 : i32, %data1 : memref) -> () { - - omp.wsloop { - // CHECK: omp.loop_nest (%{{.*}}, %{{.*}}) : i32 = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}) collapse(2) - omp.loop_nest (%iv1, %iv2) : i32 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) { - %1 = "test.payload"(%iv1) : (i32) -> (index) - %2 = "test.payload"(%iv2) : (i32) -> (index) - memref.store %iv1, %data1[%1] : memref - memref.store %iv2, %data1[%2] : memref - omp.yield - } - } - - return -} - -// CHECK-LABEL: omp_loop_nest_pretty_multiple_tiles -func.func @omp_loop_nest_pretty_multiple_tiles(%lb1 : i32, %ub1 : i32, %step1 : i32, - %lb2 : i32, %ub2 : i32, %step2 : i32, %data1 : memref) -> () { - - omp.wsloop { - // CHECK: omp.loop_nest (%{{.*}}, %{{.*}}) : i32 = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}) tiles(5, 10) - omp.loop_nest (%iv1, %iv2) : i32 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) tiles(5, 10) { - %1 = "test.payload"(%iv1) : (i32) -> (index) - %2 = "test.payload"(%iv2) : (i32) -> (index) - memref.store %iv1, %data1[%1] : memref - memref.store %iv2, %data1[%2] : memref - omp.yield - } - } - - return -} - -// CHECK-LABEL: omp_loop_nest_pretty_multiple_collapse_tiles -func.func @omp_loop_nest_pretty_multiple_collapse_tiles(%lb1 : i32, %ub1 : i32, %step1 : i32, - %lb2 : i32, %ub2 : i32, %step2 : i32, %data1 : memref) -> () { - - omp.wsloop { - // CHECK: omp.loop_nest (%{{.*}}, %{{.*}}) : i32 = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}) collapse(2) tiles(5, 10) - omp.loop_nest (%iv1, %iv2) : i32 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) tiles(5, 10) { - %1 = "test.payload"(%iv1) : (i32) -> (index) - %2 = "test.payload"(%iv2) : (i32) -> (index) - memref.store %iv1, %data1[%1] : memref - memref.store %iv2, %data1[%2] : memref - omp.yield - } - } - - return -} - // CHECK-LABEL: omp_wsloop func.func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memref, %linear_var : i32, %chunk_var : i32) -> () { diff --git a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir index d84641ff9c99b..b42e387acbb11 100644 --- a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir @@ -9,7 +9,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo %loop_lb = llvm.mlir.constant(0 : i32) : i32 %loop_step = llvm.mlir.constant(1 : index) : i32 omp.wsloop { - omp.loop_nest (%arg1, %arg2) : i32 = (%loop_lb, %loop_lb) to (%loop_ub, %loop_ub) inclusive step (%loop_step, %loop_step) collapse(2) { + omp.loop_nest (%arg1, %arg2) : i32 = (%loop_lb, %loop_lb) to (%loop_ub, %loop_ub) inclusive step (%loop_step, %loop_step) { %1 = llvm.add %arg1, %arg2 : i32 %2 = llvm.mul %arg2, %loop_ub overflow : i32 %3 = llvm.add %arg1, %2 :i32 diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index 27210bc0890ce..3f4dcd5e24c56 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -698,7 +698,7 @@ llvm.func @simd_simple(%lb : i64, %ub : i64, %step : i64, %arg0: !llvm.ptr) { // CHECK-LABEL: @simd_simple_multiple llvm.func @simd_simple_multiple(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) { omp.simd { - omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) inclusive step (%step1, %step2) collapse(2) { + omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) inclusive step (%step1, %step2) { %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32 // The form of the emitted IR is controlled by OpenMPIRBuilder and // tested there. Just check that the right metadata is added and collapsed @@ -736,7 +736,7 @@ llvm.func @simd_simple_multiple(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64 // CHECK-LABEL: @simd_simple_multiple_simdlen llvm.func @simd_simple_multiple_simdlen(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) { omp.simd simdlen(2) { - omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) { + omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) { %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32 // The form of the emitted IR is controlled by OpenMPIRBuilder and // tested there. Just check that the right metadata is added. @@ -760,7 +760,7 @@ llvm.func @simd_simple_multiple_simdlen(%lb1 : i64, %ub1 : i64, %step1 : i64, %l // CHECK-LABEL: @simd_simple_multiple_safelen llvm.func @simd_simple_multiple_safelen(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) { omp.simd safelen(2) { - omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) { + omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) { %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32 %4 = llvm.getelementptr %arg0[%iv1] : (!llvm.ptr, i64) -> !llvm.ptr, f32 %5 = llvm.getelementptr %arg1[%iv2] : (!llvm.ptr, i64) -> !llvm.ptr, f32 @@ -779,7 +779,7 @@ llvm.func @simd_simple_multiple_safelen(%lb1 : i64, %ub1 : i64, %step1 : i64, %l // CHECK-LABEL: @simd_simple_multiple_simdlen_safelen llvm.func @simd_simple_multiple_simdlen_safelen(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) { omp.simd simdlen(1) safelen(2) { - omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) { + omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) { %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32 %4 = llvm.getelementptr %arg0[%iv1] : (!llvm.ptr, i64) -> !llvm.ptr, f32 %5 = llvm.getelementptr %arg1[%iv2] : (!llvm.ptr, i64) -> !llvm.ptr, f32 @@ -1177,7 +1177,7 @@ llvm.func @collapse_wsloop( // CHECK: store i32 %[[TOTAL_SUB_1]], ptr // CHECK: call void @__kmpc_for_static_init_4u omp.wsloop { - omp.loop_nest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) collapse(3) { + omp.loop_nest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) { %31 = llvm.load %20 : !llvm.ptr -> i32 %32 = llvm.add %31, %arg0 : i32 %33 = llvm.add %32, %arg1 : i32 @@ -1239,7 +1239,7 @@ llvm.func @collapse_wsloop_dynamic( // CHECK: store i32 %[[TOTAL]], ptr // CHECK: call void @__kmpc_dispatch_init_4u omp.wsloop schedule(dynamic) { - omp.loop_nest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) collapse(3) { + omp.loop_nest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) { %31 = llvm.load %20 : !llvm.ptr -> i32 %32 = llvm.add %31, %arg0 : i32 %33 = llvm.add %32, %arg1 : i32 From 2510e0f30c0af970fd436af776836cef2b377958 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 11 Sep 2025 12:18:46 +0200 Subject: [PATCH 39/64] Backport tblgen changes --- flang/lib/Lower/OpenMP/OpenMP.cpp | 3 +- flang/lib/Lower/OpenMP/Utils.cpp | 2 +- .../mlir/Dialect/OpenMP/OpenMPClauses.td | 104 +++++++++--------- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 13 ++- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 11 +- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 2 +- 6 files changed, 69 insertions(+), 66 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 4cf2d032170d7..7b4fb649ab383 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1619,7 +1619,8 @@ genLoopNestClauses(lower::AbstractConverter &converter, if (clause.id == llvm::omp::Clause::OMPC_collapse) { const auto &collapse = std::get(clause.u); int64_t collapseValue = evaluate::ToInt64(collapse.v).value(); - clauseOps.numCollapse = firOpBuilder.getI64IntegerAttr(collapseValue); + clauseOps.collapseNumLoops = + firOpBuilder.getI64IntegerAttr(collapseValue); } else if (clause.id == llvm::omp::Clause::OMPC_sizes) { // This case handles the stand-alone tiling construct const auto &sizes = std::get(clause.u); diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 91e40f6a0f5e6..00f1baa5094d1 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -699,7 +699,7 @@ bool collectLoopRelatedInfo( found = true; } - // Collect sizes from tile directive if present + // Collect sizes from tile directive if presentOpenMPToLLVMIRTranslation.cpp: std::int64_t sizesLengthValue = 0l; std::int64_t permutationLengthValue = 0l; if (auto *ompCons{eval.getIf()}) { diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td index 4a3ae30c2a82f..9102a4320c578 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td @@ -209,6 +209,23 @@ class OpenMP_BindClauseSkip< def OpenMP_BindClause : OpenMP_BindClauseSkip<>; +//===----------------------------------------------------------------------===// +// V5.2: [4.4.3] `collapse` clause +//===----------------------------------------------------------------------===// + +class OpenMP_CollapseClauseSkip< + bit traits = false, bit arguments = false, bit assemblyFormat = false, + bit description = false, bit extraClassDeclaration = false + > : OpenMP_Clause { + let arguments = (ins + ConfinedAttr, [IntMinValue<1>]> + :$collapse_num_loops + ); +} + +def OpenMP_CollapseClause : OpenMP_CollapseClauseSkip<>; + //===----------------------------------------------------------------------===// // V5.2: [5.7.2] `copyprivate` clause //===----------------------------------------------------------------------===// @@ -317,58 +334,6 @@ class OpenMP_DeviceClauseSkip< def OpenMP_DeviceClause : OpenMP_DeviceClauseSkip<>; -//===----------------------------------------------------------------------===// -// V5.2: [XX.X] `collapse` clause -//===----------------------------------------------------------------------===// - -class OpenMP_CollapseClauseSkip< - bit traits = false, bit arguments = false, bit assemblyFormat = false, - bit description = false, bit extraClassDeclaration = false - > : OpenMP_Clause { - let arguments = (ins - DefaultValuedOptionalAttr:$num_collapse - ); -} - -def OpenMP_CollapseClause : OpenMP_CollapseClauseSkip<>; - -//===----------------------------------------------------------------------===// -// V5.2: [xx.x] `sizes` clause -//===----------------------------------------------------------------------===// - -class OpenMP_TileSizesClauseSkip< - bit traits = false, bit arguments = false, bit assemblyFormat = false, - bit description = false, bit extraClassDeclaration = false - > : OpenMP_Clause { - let arguments = (ins - OptionalAttr:$tile_sizes - ); -} - -def OpenMP_TileSizesClause : OpenMP_TileSizesClauseSkip<>; - - - -//===----------------------------------------------------------------------===// -// V6.0: [xx.x] `permutation` clause -//===----------------------------------------------------------------------===// - -class OpenMP_PermutationClauseSkip< - bit traits = false, bit arguments = false, bit assemblyFormat = false, - bit description = false, bit extraClassDeclaration = false - > : OpenMP_Clause { - let arguments = (ins - BoolAttr:$interchangeEnabled, - OptionalAttr:$permutation - ); -} - -def OpenMP_PermutationClause : OpenMP_PermutationClauseSkip<>; - - //===----------------------------------------------------------------------===// // V5.2: [11.6.1] `dist_schedule` clause //===----------------------------------------------------------------------===// @@ -1437,6 +1402,41 @@ class OpenMP_ThreadLimitClauseSkip< def OpenMP_ThreadLimitClause : OpenMP_ThreadLimitClauseSkip<>; +//===----------------------------------------------------------------------===// +// V5.2: [9.1.1] `sizes` clause +//===----------------------------------------------------------------------===// + +class OpenMP_TileSizesClauseSkip< + bit traits = false, bit arguments = false, bit assemblyFormat = false, + bit description = false, bit extraClassDeclaration = false + > : OpenMP_Clause { + let arguments = (ins + OptionalAttr:$tile_sizes + ); +} + +def OpenMP_TileSizesClause : OpenMP_TileSizesClauseSkip<>; + + +//===----------------------------------------------------------------------===// +// V6.0: [xx.x] `permutation` clause +//===----------------------------------------------------------------------===// + +class OpenMP_PermutationClauseSkip< + bit traits = false, bit arguments = false, bit assemblyFormat = false, + bit description = false, bit extraClassDeclaration = false + > : OpenMP_Clause { + let arguments = (ins + BoolAttr:$interchangeEnabled, + OptionalAttr:$permutation + ); +} + +def OpenMP_PermutationClause : OpenMP_PermutationClauseSkip<>; + + //===----------------------------------------------------------------------===// // V5.2: [12.1] `untied` clause //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index e17315d923317..830b36f440098 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -614,15 +614,18 @@ def WorkshareLoopWrapperOp : OpenMP_Op<"workshare.loop_wrapper", traits = [ def LoopNestOp : OpenMP_Op<"loop_nest", traits = [ RecursiveMemoryEffects, SameVariadicOperandSize ], clauses = [ - OpenMP_LoopRelatedClause, OpenMP_CollapseClause, + OpenMP_LoopRelatedClause, OpenMP_TileSizesClause ], singleRegion = true> { let summary = "rectangular loop nest"; let description = [{ - This operation represents a collapsed rectangular loop nest. For each - rectangular loop of the nest represented by an instance of this operation, - lower and upper bounds, as well as a step variable, must be defined. + This operation represents a rectangular loop nest which may be collapsed + and/or tiled. For each rectangular loop of the nest represented by an + instance of this operation, lower and upper bounds, as well as a step + variable, must be defined. The collapse clause specifies how many loops + that should be collapsed (1 if no collapse is done) after any tiling is + performed. The tiling sizes is represented by the tile sizes clause. The lower and upper bounds specify a half-open range: the range includes the lower bound but does not include the upper bound. If the `loop_inclusive` @@ -635,7 +638,7 @@ def LoopNestOp : OpenMP_Op<"loop_nest", traits = [ `loop_steps` arguments. ```mlir - omp.loop_nest (%i1, %i2) : i32 = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) { + omp.loop_nest (%i1, %i2) : i32 = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) collapse(2) tiles(5,5) { %a = load %arrA[%i1, %i2] : memref %b = load %arrB[%i1, %i2] : memref %sum = arith.addf %a, %b : f32 diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index c59c8889cadcb..cc57cf66dc158 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -3024,7 +3024,7 @@ void LoopNestOp::print(OpAsmPrinter &p) { if (getLoopInclusive()) p << "inclusive "; p << "step (" << getLoopSteps() << ") "; - if (int64_t numCollapse = getNumCollapse()) + if (int64_t numCollapse = getCollapseNumLoops()) if (numCollapse > 1) p << "collapse(" << numCollapse << ") "; @@ -3037,10 +3037,9 @@ void LoopNestOp::print(OpAsmPrinter &p) { void LoopNestOp::build(OpBuilder &builder, OperationState &state, const LoopNestOperands &clauses) { MLIRContext *ctx = builder.getContext(); - - LoopNestOp::build(builder, state, clauses.loopLowerBounds, - clauses.loopUpperBounds, clauses.loopSteps, - clauses.loopInclusive, clauses.numCollapse, + LoopNestOp::build(builder, state, clauses.collapseNumLoops, + clauses.loopLowerBounds, clauses.loopUpperBounds, + clauses.loopSteps, clauses.loopInclusive, makeDenseI64ArrayAttr(ctx, clauses.tileSizes)); } @@ -3059,7 +3058,7 @@ LogicalResult LoopNestOp::verify() { uint64_t numIVs = getIVs().size(); - if (const auto &numCollapse = getNumCollapse()) + if (const auto &numCollapse = getCollapseNumLoops()) if (numCollapse > numIVs) return emitOpError() << "collapse value is larger than the number of loops"; diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index c2017398bf264..6600747ad85e8 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -3073,7 +3073,7 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, } // Tiling done // Do collapse - if (const auto &numCollapse = loopOp.getNumCollapse()) { + if (const auto &numCollapse = loopOp.getCollapseNumLoops()) { SmallVector collapseLoopInfos( loopInfos.begin(), loopInfos.begin() + (numCollapse)); From de3c360886e1c5882b955570060d3749f4dd1458 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 11 Sep 2025 15:48:51 +0200 Subject: [PATCH 40/64] add Fortran testing --- flang/lib/Lower/OpenMP/Utils.cpp | 2 +- .../Dialect/OpenMP/OpenMPClauseOperands.h | 2 +- .../Conversion/SCFToOpenMP/SCFToOpenMP.cpp | 7 ++- .../parallel-wsloop-collapse-intdo.f90 | 33 ++++++++++ .../parallel-wsloop-collapse-intdo.o | Bin 0 -> 3179 bytes openmp/runtime/test/transform/tile/intdo.f90 | 58 +++++++++++++++++ .../tile/parallel-wsloop-collapse-intdo.f90 | 59 ++++++++++++++++++ 7 files changed, 156 insertions(+), 5 deletions(-) create mode 100644 openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-intdo.f90 create mode 100644 openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-intdo.o create mode 100644 openmp/runtime/test/transform/tile/intdo.f90 create mode 100644 openmp/runtime/test/transform/tile/parallel-wsloop-collapse-intdo.f90 diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 00f1baa5094d1..91e40f6a0f5e6 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -699,7 +699,7 @@ bool collectLoopRelatedInfo( found = true; } - // Collect sizes from tile directive if presentOpenMPToLLVMIRTranslation.cpp: + // Collect sizes from tile directive if present std::int64_t sizesLengthValue = 0l; std::int64_t permutationLengthValue = 0l; if (auto *ompCons{eval.getIf()}) { diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h index faf820dcfdb29..6a92b136ef51c 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h @@ -40,7 +40,7 @@ struct DeviceTypeClauseOps { /// Clauses that correspond to operations other than omp.target, but might have /// to be evaluated outside of a parent target region. using HostEvaluatedOperands = - detail::Clauses; // TODO: Add `indirect` clause. diff --git a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp index 19fbefb48a378..460595ba9f254 100644 --- a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp +++ b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp @@ -492,9 +492,10 @@ struct ParallelOpLowering : public OpRewritePattern { // Create loop nest and populate region with contents of scf.parallel. auto loopOp = omp::LoopNestOp::create( - rewriter, parallelOp.getLoc(), parallelOp.getLowerBound(), - parallelOp.getUpperBound(), parallelOp.getStep(), false, - parallelOp.getLowerBound().size(), nullptr); + rewriter, parallelOp.getLoc(), parallelOp.getLowerBound().size(), + parallelOp.getLowerBound(), parallelOp.getUpperBound(), + parallelOp.getStep(), /*loop_inclusive=*/false, + /*tile_sizes=*/nullptr); rewriter.inlineRegionBefore(parallelOp.getRegion(), loopOp.getRegion(), loopOp.getRegion().begin()); diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-intdo.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-intdo.f90 new file mode 100644 index 0000000000000..498534374ea30 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-intdo.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_collapse_intdo + integer :: i, j + print *, 'do' + + !$OMP PARALLEL DO COLLAPSE(2) NUM_THREADS(1) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-intdo.o b/openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-intdo.o new file mode 100644 index 0000000000000000000000000000000000000000..a0abcfdf74fdaf7e6ab474ce4421da0c0e191065 GIT binary patch literal 3179 zcmZ`*UyKt)7@uVecbtb-MMC^XJ&PX7mHxZtj*Fp%+h#Ae@eZ%Vhn~x}-5$$rcbnZU zpbrY3lB`V=jW23^QeTYvs3Os%CnQGFh?aXg} z^ZRDL@0;D3R!-hWu;_h+j8H^K#8!^kF2?5MIzrBKgzOJ6G7L?KOdpI?WLsvGU!Pv@ zLkiL)+K!YgtDEiZ9Y@`_6s9N6c;=`g(Egy20rl9fU!$8@ks{2ZKGjM?fO> z=z8bFbj=m`d3`qCl6HIjDbJGanfUl1RNfUd+p*uwQX_ zu%HR#Ib9GJZ>GhT7EXVUl-R;RNahsERuGELlIY}b`2*+(8))WlG0@tCvs3K+B(h%S z6l}gLWK6C_(;d4=h25(?>}%<2EBbJ&!*?jJh1I;7$T%_Ks<5tLjYpy ztMGAXo17yAZjjEuWg!RyWLX%*b(V#Gbj|hC_1Y=i^c^39(OfIPIoFyC=30e8u_g4! z{?0kYa46@@hKEIGG8|o=43FV7(59hbPB!yFNL5o9+;MKP4Sw8?wH@J3Q@G;@D^2*% z-yT0_rkeg*EV8xfi+a;}FswII!CzH6s|3_`NejhBV4{g zn!**Ycd02{YUVFH`MZvQv7(Qsac^VojML7z>8Cq_@q{3bVH7c!=q_An7W~`;zRQN1 z`3n@FbKnq-^Un3HtE;QxiLLyaBPg0bHdNV~YLcNWmS)Y>tg(z3`xf-CSD2LxZZgPMe zhnlV11jN?;2oSy}0rEN2Y~8N`v32hPV(W6S{cPP$fEYIg=xJ*En$NufC``F`eC}O9 z%=QDH`w-A}s#^hsZ)||90%HC>2;bc^lnViRmZF_L8U@7ss{`6WM;m}1qv&-&&r!q; z@WKGU-sGkU0lXb>9Pz&0#e$`nRb7E*56SwxQmo6StZ9lia@5icqdrnLG)=Bs$_R)mMx;6( zB^6_MNPS^wXJ{(AGc*)B%;1iSp(_&&eL**l>V6xYh)zsE1W+IrTDG{Y*X(scCqE1v zZVX4#J__xc6tbB@ZZZza=?lp3mLtGX4@)KZrM`sJG4MO8f`HxCiZmTk3+ zrmzY*g%y;8;3?jMgRDzy9`=+Kp7aCQ01xjgc)II2&QrJ9Xy?reuLI#ZoE6`wA9DBq zh78=k9fv&(f`-y=g$!sqZd4xn-5O*-({Y=kr#y!zJC>h6+qIGT-g%Sa`N4DeJz?R_ z5B$E~-=OzhA5gQ7bXw84IOY{0hcA9|d4q42b*)g-ZG|@y&(Z4%(Cn70U-tP{Gp-eC zx~(wFJm+QoyFF^G-?w_pwL(p|)kbP1QXcMqv2_3$%=5k57r#rM!}}M@@}JdFWI)q# z^WyI$mP3r^n~xJoWC%%D71@qN@uVlKtDqs$%uG#IbtK?*4Y@wzH|-UcuU)Rs$TH?l zPvA3u^)TP|Fn<8kh6mSTd*9bHvzIkAW@~Nj(fopgZef~?*M*ib^rwKY@xnX^gZCm; zd#vyr_6qDyRn-)?`wwJnp36?{&l{D7&l2)0cqd5ka#VC16>TZCk0V)lHkmyeZdSwq>)+TF@&~2C%kL zQ}m7zR}aG-gUsW4!#X7CjhbX1G8MUEd6AT4(^M7HdqE-*qgI#Tc=g8THKQbJt`$z; z4L}MOq%xes9fX&YDkP;dS{7HvDyu-x%#^iPQpr@-h4Ha?JT;z}jK|~q;?d-IB8n~a zqNz+5JKkZN5>lgXcRW=}R8om(Dxp9XE5|GHSTZ>_D#t6;ST&mLbz}-|l6l2U#L2%1 Co=emK literal 0 HcmV?d00001 diff --git a/openmp/runtime/test/transform/tile/intdo.f90 b/openmp/runtime/test/transform/tile/intdo.f90 new file mode 100644 index 0000000000000..40e4c2c53f89c --- /dev/null +++ b/openmp/runtime/test/transform/tile/intdo.f90 @@ -0,0 +1,58 @@ +! This test checks lowering of OpenMP tile directive +! XFAIL: * + +! RUN: %flang %flags %openmp_flags -fopenmp-version=51 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + + +program tile_intdo + integer i, j + print *, 'do' + + !$OMP TILE SIZES(2,2) + do i=7, 18, 3 + do j=7, 19, 3 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END TILE + + print *, 'done' +end program + + +! CHECK: do + +! Complete tile +! CHECK-NEXT: i=7 j=7 +! CHECK-NEXT: i=7 j=10 +! CHECK-NEXT: i=10 j=7 +! CHECK-NEXT: i=10 j=10 + +! Complete tile +! CHECK-NEXT: i=7 j=13 +! CHECK-NEXT: i=7 j=16 +! CHECK-NEXT: i=10 j=13 +! CHECK-NEXT: i=10 j=16 + +! Partial tile +! CHECK-NEXT: i=7 j=19 +! CHECK-NEXT: i=10 j=19 + +! Complete tile +! CHECK-NEXT: i=13 j=7 +! CHECK-NEXT: i=13 j=10 +! CHECK-NEXT: i=16 j=7 +! CHECK-NEXT: i=16 j=10 + +! Complete tile +! CHECK-NEXT: i=13 j=13 +! CHECK-NEXT: i=13 j=16 +! CHECK-NEXT: i=16 j=13 +! CHECK-NEXT: i=16 j=16 + +! Partial tile +! CHECK-NEXT: i=13 j=19 +! CHECK-NEXT: i=16 j=19 + +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/tile/parallel-wsloop-collapse-intdo.f90 b/openmp/runtime/test/transform/tile/parallel-wsloop-collapse-intdo.f90 new file mode 100644 index 0000000000000..66bddf30e045a --- /dev/null +++ b/openmp/runtime/test/transform/tile/parallel-wsloop-collapse-intdo.f90 @@ -0,0 +1,59 @@ +! This test checks lowering of OpenMP tile directive + +! RUN: %flang %flags %openmp_flags -fopenmp-version=51 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + + +program tile_wsloop_collapse_intdo + integer i, j + print *, 'do' + + !$OMP PARALLEL DO COLLAPSE(2) NUM_THREADS(1) + !$OMP TILE SIZES(2,2) + do i=7, 18, 3 + do j=7, 19, 3 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END TILE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do + +! Complete tile +! CHECK-NEXT: i=7 j=7 +! CHECK-NEXT: i=7 j=10 +! CHECK-NEXT: i=10 j=7 +! CHECK-NEXT: i=10 j=10 + +! Complete tile +! CHECK-NEXT: i=7 j=13 +! CHECK-NEXT: i=7 j=16 +! CHECK-NEXT: i=10 j=13 +! CHECK-NEXT: i=10 j=16 + +! Partial tile +! CHECK-NEXT: i=7 j=19 +! CHECK-NEXT: i=10 j=19 + +! Complete tile +! CHECK-NEXT: i=13 j=7 +! CHECK-NEXT: i=13 j=10 +! CHECK-NEXT: i=16 j=7 +! CHECK-NEXT: i=16 j=10 + +! Complete tile +! CHECK-NEXT: i=13 j=13 +! CHECK-NEXT: i=13 j=16 +! CHECK-NEXT: i=16 j=13 +! CHECK-NEXT: i=16 j=16 + +! Partial tile +! CHECK-NEXT: i=13 j=19 +! CHECK-NEXT: i=16 j=19 + +! CHECK-NEXT: done From 0a5a05cf503fd1dde25ba91ca49dbeb55be80884 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 11 Sep 2025 17:26:40 +0200 Subject: [PATCH 41/64] systematic testing --- .../distribute-parallel-wsloop-intdo.f90 | 37 +++++++++++++++++++ .../distribute-parallel-wsloop-simd-intdo.f90 | 37 +++++++++++++++++++ .../interchange/distribute-simd-intdo.f90 | 37 +++++++++++++++++++ .../test/transform/interchange/intdo.f90 | 31 ++++++++++++++++ .../interchange/parallel-wsloop-intdo.f90 | 33 +++++++++++++++++ .../interchange/taskloop-simd-intdo.f90 | 33 +++++++++++++++++ .../transform/interchange/wsloop-intdo.f90 | 33 +++++++++++++++++ .../interchange/wsloop-simd-intdo.f90 | 33 +++++++++++++++++ 8 files changed, 274 insertions(+) create mode 100644 openmp/runtime/test/transform/interchange/distribute-parallel-wsloop-intdo.f90 create mode 100644 openmp/runtime/test/transform/interchange/distribute-parallel-wsloop-simd-intdo.f90 create mode 100644 openmp/runtime/test/transform/interchange/distribute-simd-intdo.f90 create mode 100644 openmp/runtime/test/transform/interchange/intdo.f90 create mode 100644 openmp/runtime/test/transform/interchange/parallel-wsloop-intdo.f90 create mode 100644 openmp/runtime/test/transform/interchange/taskloop-simd-intdo.f90 create mode 100644 openmp/runtime/test/transform/interchange/wsloop-intdo.f90 create mode 100644 openmp/runtime/test/transform/interchange/wsloop-simd-intdo.f90 diff --git a/openmp/runtime/test/transform/interchange/distribute-parallel-wsloop-intdo.f90 b/openmp/runtime/test/transform/interchange/distribute-parallel-wsloop-intdo.f90 new file mode 100644 index 0000000000000..b5ef5214f5064 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/distribute-parallel-wsloop-intdo.f90 @@ -0,0 +1,37 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP TEAMS NUM_TEAMS(1) + + !$OMP DISTRIBUTE PARALLEL DO SCHEDULE(static,2) NUM_THREADS(1) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END DISTRIBUTE PARALLEL DO + + !$OMP END TEAMS + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/distribute-parallel-wsloop-simd-intdo.f90 b/openmp/runtime/test/transform/interchange/distribute-parallel-wsloop-simd-intdo.f90 new file mode 100644 index 0000000000000..2a192cad017a6 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/distribute-parallel-wsloop-simd-intdo.f90 @@ -0,0 +1,37 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP TEAMS NUM_TEAMS(1) + + !$OMP DISTRIBUTE PARALLEL DO SIMD SCHEDULE(static,2) NUM_THREADS(1) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END DISTRIBUTE PARALLEL DO SIMD + + !$OMP END TEAMS + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/distribute-simd-intdo.f90 b/openmp/runtime/test/transform/interchange/distribute-simd-intdo.f90 new file mode 100644 index 0000000000000..fce62b7f3ccda --- /dev/null +++ b/openmp/runtime/test/transform/interchange/distribute-simd-intdo.f90 @@ -0,0 +1,37 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP TEAMS NUM_TEAMS(1) + + !$OMP DISTRIBUTE SIMD + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END DISTRIBUTE SIMD + + !$OMP END TEAMS + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/intdo.f90 b/openmp/runtime/test/transform/interchange/intdo.f90 new file mode 100644 index 0000000000000..fe6820f41dba6 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/intdo.f90 @@ -0,0 +1,31 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_intdo + integer :: i, j + print *, 'do' + + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo.f90 new file mode 100644 index 0000000000000..cfa3bddf5c8d5 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP PARALLEL DO NUM_THREADS(1) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/taskloop-simd-intdo.f90 b/openmp/runtime/test/transform/interchange/taskloop-simd-intdo.f90 new file mode 100644 index 0000000000000..2e8293dd6bec6 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/taskloop-simd-intdo.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP TASKLOOP SIMD SCHEDULE(static,2) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END TASKLOOP SIMD + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/wsloop-intdo.f90 b/openmp/runtime/test/transform/interchange/wsloop-intdo.f90 new file mode 100644 index 0000000000000..32b1b87a9e859 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/wsloop-intdo.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP DO SCHEDULE(static,2) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/wsloop-simd-intdo.f90 b/openmp/runtime/test/transform/interchange/wsloop-simd-intdo.f90 new file mode 100644 index 0000000000000..56ed14b165fa3 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/wsloop-simd-intdo.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP DO SIMD SCHEDULE(static,2) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END DO SIMD + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done From e84423057ba7fdf2cb8ae9c4c13a00c31d5eaba7 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 11 Sep 2025 23:17:32 +0200 Subject: [PATCH 42/64] Implement standalone interchange --- flang/include/flang/Lower/AbstractConverter.h | 5 + flang/lib/Lower/Bridge.cpp | 114 ++++ flang/lib/Lower/OpenMP/OpenMP.cpp | 568 +++++++++++++++++- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 5 + .../interchange/intdo-permutation.f90 | 42 ++ .../parallel-wsloop-intdo-firstprivate.f90 | 35 ++ .../parallel-wsloop-intdo-private.f90 | 34 ++ .../parallel-wsloop-intdo-reduction.f90 | 27 + ...teams-distribute-parallel-wsloop-intdo.f90 | 33 + .../transform/interchange/taskloop-intdo.f90 | 35 ++ .../interchange/taskloop-simd-intdo.f90 | 3 +- ...teams-distribute-parallel-wsloop-intdo.f90 | 33 + 12 files changed, 932 insertions(+), 2 deletions(-) create mode 100644 openmp/runtime/test/transform/interchange/intdo-permutation.f90 create mode 100644 openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-firstprivate.f90 create mode 100644 openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private.f90 create mode 100644 openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-reduction.f90 create mode 100644 openmp/runtime/test/transform/interchange/target-teams-distribute-parallel-wsloop-intdo.f90 create mode 100644 openmp/runtime/test/transform/interchange/taskloop-intdo.f90 create mode 100644 openmp/runtime/test/transform/interchange/teams-distribute-parallel-wsloop-intdo.f90 diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h index 8e9de418e1b7e..e7c1cb92c445e 100644 --- a/flang/include/flang/Lower/AbstractConverter.h +++ b/flang/include/flang/Lower/AbstractConverter.h @@ -193,6 +193,9 @@ class AbstractConverter { std::unique_ptr expression, mlir::Type eleTy) = 0; + + + //===--------------------------------------------------------------------===// // Expressions //===--------------------------------------------------------------------===// @@ -381,6 +384,8 @@ class AbstractConverter { virtual mlir::StateStack &getStateStack() = 0; + virtual void genPermutatedLoops( llvm:: ArrayRef doStmts, Fortran:: lower::pft::Evaluation *innermostDo) = 0; + private: /// Options controlling lowering behavior. const Fortran::lower::LoweringOptions &loweringOptions; diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 6125ea9153662..0f21235872edb 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -2187,6 +2187,120 @@ class FirConverter : public Fortran::lower::AbstractConverter { // so no clean-up needs to be generated for these entities. } + void genPermutatedLoops( llvm:: ArrayRef doStmts, Fortran:: lower::pft::Evaluation *innermostDo) override { + // Fortran::lower::pft::Evaluation &eval = getEval(); + // bool unstructuredContext = eval.lowerAsUnstructured(); + + llvm:: SmallVector< mlir::Block *> headerBlocks; + llvm:: SmallVector loopInfos; + + auto enterLoop = [&](Fortran:: lower::pft::Evaluation &eval) { eval.dump(); + bool unstructuredContext = eval.lowerAsUnstructured(); + + // Collect loop nest information. + // Generate begin loop code directly for infinite and while loops. + Fortran::lower::pft::Evaluation &doStmtEval = eval.getFirstNestedEvaluation(); + auto *doStmt = doStmtEval.getIf(); + const auto &loopControl = std::get>(doStmt->t); + mlir::Block *preheaderBlock = doStmtEval.block; + mlir::Block *beginBlock = preheaderBlock ? preheaderBlock : builder->getBlock(); + auto createNextBeginBlock = [&]() { + // Step beginBlock through unstructured preheader, header, and mask + // blocks, created in outermost to innermost order. + return beginBlock = beginBlock->splitBlock(beginBlock->end()); + }; + mlir::Block *headerBlock = unstructuredContext ? createNextBeginBlock() : nullptr; + headerBlocks.push_back(headerBlock); + mlir::Block *bodyBlock = doStmtEval.lexicalSuccessor->block; + mlir::Block *exitBlock = doStmtEval.parentConstruct->constructExit->block; + IncrementLoopNestInfo &incrementLoopNestInfo = loopInfos.emplace_back(); + const Fortran::parser::ScalarLogicalExpr *whileCondition = nullptr; + bool infiniteLoop = !loopControl.has_value(); + if (infiniteLoop) { + assert(unstructuredContext && "infinite loop must be unstructured"); + startBlock(headerBlock); + } else if ((whileCondition = + std::get_if( + &loopControl->u))) { + assert(unstructuredContext && "while loop must be unstructured"); + maybeStartBlock(preheaderBlock); // no block or empty block + startBlock(headerBlock); + genConditionalBranch(*whileCondition, bodyBlock, exitBlock); + } else if (const auto *bounds = + std::get_if( + &loopControl->u)) { + // Non-concurrent increment loop. + IncrementLoopInfo &info = incrementLoopNestInfo.emplace_back( + *bounds->name.thing.symbol, bounds->lower, bounds->upper, + bounds->step); + if (unstructuredContext) { + maybeStartBlock(preheaderBlock); + info.hasRealControl = info.loopVariableSym->GetType()->IsNumeric( + Fortran::common::TypeCategory::Real); + info.headerBlock = headerBlock; + info.bodyBlock = bodyBlock; + info.exitBlock = exitBlock; + } + } else { + llvm_unreachable("Cannot permute DO CONCURRENT"); + } + + + + // Increment loop begin code. (Infinite/while code was already generated.) + if (!infiniteLoop && !whileCondition) + genFIRIncrementLoopBegin(incrementLoopNestInfo, doStmtEval.dirs); + }; + + + + auto leaveLoop = [&](Fortran:: lower::pft::Evaluation &eval, mlir::Block *headerBlock , IncrementLoopNestInfo &incrementLoopNestInfo) { eval.dump(); + bool unstructuredContext = eval.lowerAsUnstructured(); + + Fortran::lower::pft::Evaluation &doStmtEval = eval.getFirstNestedEvaluation(); + auto *doStmt = doStmtEval.getIf(); + + const auto &loopControl = std::get>(doStmt->t); + bool infiniteLoop = !loopControl.has_value(); + const Fortran::parser::ScalarLogicalExpr *whileCondition = std::get_if( &loopControl->u); + + auto iter = std::prev( eval.getNestedEvaluations().end()); + + // An EndDoStmt in unstructured code may start a new block. + Fortran::lower::pft::Evaluation &endDoEval = *iter; + assert(endDoEval.getIf() && "no enddo stmt"); + if (unstructuredContext) + maybeStartBlock(endDoEval.block); + + // Loop end code. + if (infiniteLoop || whileCondition) + genBranch(headerBlock); + else + genFIRIncrementLoopEnd(incrementLoopNestInfo); + + // This call may generate a branch in some contexts. + genFIR(endDoEval, unstructuredContext); + }; + + + + + + for (auto l : doStmts) enterLoop(*l); + + + // Loop body code. + // Fortran:: lower::pft::Evaluation *innermostEval = doStmts.back(); innermostEval->dump(); + bool innermostUnstructuredContext = innermostDo->lowerAsUnstructured(); + + auto iter = innermostDo->getNestedEvaluations().begin(); + for (auto end = --innermostDo->getNestedEvaluations().end(); iter != end; ++iter) + genFIR(*iter, innermostUnstructuredContext); + + for (auto &&[l,headerBlock,li] : llvm::zip_equal( doStmts,headerBlocks,loopInfos)) + leaveLoop(*l,headerBlock,li); +} + /// Generate FIR for a DO construct. There are six variants: /// - unstructured infinite and while loops /// - structured and unstructured increment loops diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 7b4fb649ab383..7adedd934d582 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -45,6 +45,11 @@ #include "mlir/Transforms/RegionUtils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" using namespace Fortran::lower::omp; using namespace Fortran::common::openmp; @@ -2274,6 +2279,567 @@ static void genUnrollOp(Fortran::lower::AbstractConverter &converter, mlir::omp::UnrollHeuristicOp::create(firOpBuilder, loc, cli); } +static +void collectLoops( lower::pft::Evaluation &eval, + llvm::SmallVectorImpl< lower::pft::Evaluation* > &result, + int numLoops) { + + + + + + std::size_t loopVarTypeSize = 0; + lower::pft::Evaluation *doConstructEval = &eval.getFirstNestedEvaluation(); + for (auto i : llvm::seq(numLoops)) { + lower::pft::Evaluation *doLoop = &doConstructEval->getFirstNestedEvaluation(); + auto *doStmt = doLoop->getIf(); + assert(doStmt && "Expected do loop to be in the nested evaluation"); + const auto &loopControl = std::get>(doStmt->t); + const parser::LoopControl::Bounds *bounds = std::get_if(&loopControl->u); + assert(bounds && "Expected bounds for worksharing do loop"); + lower::StatementContext stmtCtx; + + + result.push_back(doConstructEval); + + + doConstructEval = &*std::next(doConstructEval->getNestedEvaluations().begin()); + }; +} + + +static void enterDoLoop( lower::pft::Evaluation * doStmt) { +} + + + +static void leaveDoLoop( lower::pft::Evaluation *doStmt) { +} + + + #if 0 + +// copied from Bridge.cpp +namespace { +struct IncrementLoopInfo { + template + explicit IncrementLoopInfo(Fortran::semantics::Symbol &sym, const T &lower, + const T &upper, const std::optional &step, + bool isConcurrent = false) + : loopVariableSym{&sym}, lowerExpr{Fortran::semantics::GetExpr(lower)}, + upperExpr{Fortran::semantics::GetExpr(upper)}, + stepExpr{Fortran::semantics::GetExpr(step)}, + isConcurrent{isConcurrent} {} + + IncrementLoopInfo(IncrementLoopInfo &&) = default; + IncrementLoopInfo &operator=(IncrementLoopInfo &&x) = default; + + bool isStructured() const { return !headerBlock; } + + mlir::Type getLoopVariableType() const { + assert(loopVariable && "must be set"); + return fir::unwrapRefType(loopVariable.getType()); + } + + bool hasLocalitySpecs() const { + return !localSymList.empty() || !localInitSymList.empty() || + !reduceSymList.empty() || !sharedSymList.empty(); + } + + // Data members common to both structured and unstructured loops. + const Fortran::semantics::Symbol *loopVariableSym; + const Fortran::lower::SomeExpr *lowerExpr; + const Fortran::lower::SomeExpr *upperExpr; + const Fortran::lower::SomeExpr *stepExpr; + const Fortran::lower::SomeExpr *maskExpr = nullptr; + bool isConcurrent; + llvm::SmallVector localSymList; + llvm::SmallVector localInitSymList; + llvm::SmallVector reduceSymList; + llvm::SmallVector reduceOperatorList; + llvm::SmallVector sharedSymList; + mlir::Value loopVariable = nullptr; + + // Data members for structured loops. + mlir::Operation *loopOp = nullptr; + + // Data members for unstructured loops. + bool hasRealControl = false; + mlir::Value tripVariable = nullptr; + mlir::Value stepVariable = nullptr; + mlir::Block *headerBlock = nullptr; // loop entry and test block + mlir::Block *maskBlock = nullptr; // concurrent loop mask block + mlir::Block *bodyBlock = nullptr; // first loop body block + mlir::Block *exitBlock = nullptr; // loop exit target block +}; + +using IncrementLoopNestInfo = llvm::SmallVector; + +struct MyFirConverter : public Fortran::lower::AbstractConverter { + fir::FirOpBuilder *builder = nullptr; + Fortran::parser::CharBlock currentPosition; + + /// Return the predicate: "current block does not have a terminator branch". + bool blockIsUnterminated() { + mlir::Block *currentBlock = builder->getBlock(); + return currentBlock->empty() || !currentBlock->back().hasTrait(); + } + + /// Convert a parser CharBlock to a Location + mlir::Location toLocation(const Fortran::parser::CharBlock &cb) { + return genLocation(cb); + } + + mlir::Location toLocation() { return toLocation(currentPosition); } + + +#if 0 + static mlir::Location genLocation(Fortran::parser::SourcePosition pos, mlir::MLIRContext &ctx) { + llvm::SmallString<256> path(*pos.path); + llvm::sys::fs::make_absolute(path); + llvm::sys::path::remove_dots(path); + return mlir::FileLineColLoc::get(&ctx, path.str(), pos.line, pos.column); + } +#endif + + + void genBranch(mlir::Block *targetBlock) { + assert(targetBlock && "missing unconditional target block"); + mlir::cf::BranchOp::create(*builder, toLocation(), targetBlock); + } + + /// Unconditionally switch code insertion to a new block. + void startBlock(mlir::Block *newBlock) { + assert(newBlock && "missing block"); + // Default termination for the current block is a fallthrough branch to + // the new block. + if (blockIsUnterminated()) + genBranch(newBlock); + // Some blocks may be re/started more than once, and might not be empty. + // If the new block already has (only) a terminator, set the insertion + // point to the start of the block. Otherwise set it to the end. + builder->setInsertionPointToStart(newBlock); + if (blockIsUnterminated()) + builder->setInsertionPointToEnd(newBlock); + } + + + /// Conditionally switch code insertion to a new block. + void maybeStartBlock(mlir::Block *newBlock) { + if (newBlock) + startBlock(newBlock); + } + + + void genConditionalBranch(mlir::Value cond, mlir::Block *trueTarget, + mlir::Block *falseTarget) { + assert(trueTarget && "missing conditional branch true block"); + assert(falseTarget && "missing conditional branch false block"); + mlir::Location loc = toLocation(); + mlir::Value bcc = builder->createConvert(loc, builder->getI1Type(), cond); + mlir::cf::CondBranchOp::create(*builder, loc, bcc, trueTarget, + mlir::ValueRange{}, falseTarget, + mlir::ValueRange{}); + } + + + void genConditionalBranch(mlir::Value cond, + Fortran::lower::pft::Evaluation *trueTarget, + Fortran::lower::pft::Evaluation *falseTarget) { + genConditionalBranch(cond, trueTarget->block, falseTarget->block); + } + + mlir::Value createFIRExpr(mlir::Location loc, + const Fortran::lower::SomeExpr *expr, + Fortran::lower::StatementContext &stmtCtx) { + return fir::getBase(genExprValue(*expr, stmtCtx, &loc)); + } + + + void genConditionalBranch(const Fortran::parser::ScalarLogicalExpr &expr, + mlir::Block *trueTarget, mlir::Block *falseTarget) { + Fortran::lower::StatementContext stmtCtx; + mlir::Value cond = createFIRExpr(toLocation(), Fortran::semantics::GetExpr(expr), stmtCtx); + stmtCtx.finalizeAndReset(); + genConditionalBranch(cond, trueTarget, falseTarget); + } + + + void genConditionalBranch(const Fortran::parser::ScalarLogicalExpr &expr, + Fortran::lower::pft::Evaluation *trueTarget, + Fortran::lower::pft::Evaluation *falseTarget) { + Fortran::lower::StatementContext stmtCtx; + mlir::Value cond = createFIRExpr(toLocation(), Fortran::semantics::GetExpr(expr), stmtCtx); + stmtCtx.finalizeAndReset(); + genConditionalBranch(cond, trueTarget->block, falseTarget->block); + } + + /// Generate the address of loop variable \p sym. + /// If \p sym is not mapped yet, allocate local storage for it. + mlir::Value genLoopVariableAddress(mlir::Location loc, + const Fortran::semantics::Symbol &sym, + bool isUnordered) { + if (!shallowLookupSymbol(sym) && + (isUnordered || + GetSymbolDSA(sym).test(Fortran::semantics::Symbol::Flag::OmpPrivate) || + GetSymbolDSA(sym).test( + Fortran::semantics::Symbol::Flag::OmpFirstPrivate) || + GetSymbolDSA(sym).test( + Fortran::semantics::Symbol::Flag::OmpLastPrivate) || + GetSymbolDSA(sym).test(Fortran::semantics::Symbol::Flag::OmpLinear))) { + // Do concurrent loop variables are not mapped yet since they are + // local to the Do concurrent scope (same for OpenMP loops). + mlir::OpBuilder::InsertPoint insPt = builder->saveInsertionPoint(); + builder->setInsertionPointToStart(builder->getAllocaBlock()); + mlir::Type tempTy = genType(sym); + mlir::Value temp = + builder->createTemporaryAlloc(loc, tempTy, toStringRef(sym.name())); + bindIfNewSymbol(sym, temp); + builder->restoreInsertionPoint(insPt); + } + auto entry = lookupSymbol(sym); + (void)entry; + assert(entry && "loop control variable must already be in map"); + Fortran::lower::StatementContext stmtCtx; + return fir::getBase( + genExprAddr(Fortran::evaluate::AsGenericExpr(sym).value(), stmtCtx)); + } + + /// Generate FIR to begin a structured or unstructured increment loop nest. + void genFIRIncrementLoopBegin( + IncrementLoopNestInfo &incrementLoopNestInfo, + llvm::SmallVectorImpl &dirs) { + assert(!incrementLoopNestInfo.empty() && "empty loop nest"); + mlir::Location loc = toLocation(); + mlir::arith::IntegerOverflowFlags iofBackup{}; + + llvm::SmallVector nestLBs; + llvm::SmallVector nestUBs; + llvm::SmallVector nestSts; + llvm::SmallVector nestReduceOperands; + llvm::SmallVector nestReduceAttrs; + bool genDoConcurrent = false; + + for (IncrementLoopInfo &info : incrementLoopNestInfo) { + genDoConcurrent = info.isStructured() && info.isConcurrent; + + if (!genDoConcurrent) + info.loopVariable = genLoopVariableAddress(loc, *info.loopVariableSym, info.isConcurrent); + + if (!getLoweringOptions().getIntegerWrapAround()) { + iofBackup = builder->getIntegerOverflowFlags(); + builder->setIntegerOverflowFlags( + mlir::arith::IntegerOverflowFlags::nsw); + } + + nestLBs.push_back(genControlValue(info.lowerExpr, info)); + nestUBs.push_back(genControlValue(info.upperExpr, info)); + bool isConst = true; + nestSts.push_back(genControlValue( + info.stepExpr, info, info.isStructured() ? nullptr : &isConst)); + + if (!getLoweringOptions().getIntegerWrapAround()) + builder->setIntegerOverflowFlags(iofBackup); + + // Use a temp variable for unstructured loops with non-const step. + if (!isConst) { + mlir::Value stepValue = nestSts.back(); + info.stepVariable = builder->createTemporary(loc, stepValue.getType()); + fir::StoreOp::create(*builder, loc, stepValue, info.stepVariable); + } + } + + for (auto [info, lowerValue, upperValue, stepValue] : + llvm::zip_equal(incrementLoopNestInfo, nestLBs, nestUBs, nestSts)) { + // Structured loop - generate fir.do_loop. + if (info.isStructured()) { + if (genDoConcurrent) + continue; + + // The loop variable is a doLoop op argument. + mlir::Type loopVarType = info.getLoopVariableType(); + auto loopOp = fir::DoLoopOp::create( + *builder, loc, lowerValue, upperValue, stepValue, + /*unordered=*/false, + /*finalCountValue=*/true, + builder->createConvert(loc, loopVarType, lowerValue)); + info.loopOp = loopOp; + builder->setInsertionPointToStart(loopOp.getBody()); + mlir::Value loopValue = loopOp.getRegionIterArgs()[0]; + + // Update the loop variable value in case it has non-index references. + fir::StoreOp::create(*builder, loc, loopValue, info.loopVariable); + addLoopAnnotationAttr(info, dirs); + continue; + } + + // Unstructured loop preheader - initialize tripVariable and loopVariable. + mlir::Value tripCount; + if (info.hasRealControl) { + auto diff1 = + mlir::arith::SubFOp::create(*builder, loc, upperValue, lowerValue); + auto diff2 = + mlir::arith::AddFOp::create(*builder, loc, diff1, stepValue); + tripCount = + mlir::arith::DivFOp::create(*builder, loc, diff2, stepValue); + tripCount = + builder->createConvert(loc, builder->getIndexType(), tripCount); + } else { + auto diff1 = + mlir::arith::SubIOp::create(*builder, loc, upperValue, lowerValue); + auto diff2 = + mlir::arith::AddIOp::create(*builder, loc, diff1, stepValue); + tripCount = + mlir::arith::DivSIOp::create(*builder, loc, diff2, stepValue); + } + if (forceLoopToExecuteOnce) { // minimum tripCount is 1 + mlir::Value one = + builder->createIntegerConstant(loc, tripCount.getType(), 1); + auto cond = mlir::arith::CmpIOp::create( + *builder, loc, mlir::arith::CmpIPredicate::slt, tripCount, one); + tripCount = + mlir::arith::SelectOp::create(*builder, loc, cond, one, tripCount); + } + info.tripVariable = builder->createTemporary(loc, tripCount.getType()); + fir::StoreOp::create(*builder, loc, tripCount, info.tripVariable); + fir::StoreOp::create(*builder, loc, lowerValue, info.loopVariable); + + // Unstructured loop header - generate loop condition and mask. + // Note - Currently there is no way to tag a loop as a concurrent loop. + startBlock(info.headerBlock); + tripCount = fir::LoadOp::create(*builder, loc, info.tripVariable); + mlir::Value zero = + builder->createIntegerConstant(loc, tripCount.getType(), 0); + auto cond = mlir::arith::CmpIOp::create( + *builder, loc, mlir::arith::CmpIPredicate::sgt, tripCount, zero); + if (info.maskExpr) { + genConditionalBranch(cond, info.maskBlock, info.exitBlock); + startBlock(info.maskBlock); + mlir::Block *latchBlock = getEval().getLastNestedEvaluation().block; + assert(latchBlock && "missing masked concurrent loop latch block"); + Fortran::lower::StatementContext stmtCtx; + mlir::Value maskCond = createFIRExpr(loc, info.maskExpr, stmtCtx); + stmtCtx.finalizeAndReset(); + genConditionalBranch(maskCond, info.bodyBlock, latchBlock); + } else { + genConditionalBranch(cond, info.bodyBlock, info.exitBlock); + if (&info != &incrementLoopNestInfo.back()) // not innermost + startBlock(info.bodyBlock); // preheader block of enclosed dimension + } + if (info.hasLocalitySpecs()) { + mlir::OpBuilder::InsertPoint insertPt = builder->saveInsertionPoint(); + builder->setInsertionPointToStart(info.bodyBlock); + handleLocalitySpecs(info); + builder->restoreInsertionPoint(insertPt); + } + } + + if (genDoConcurrent) { + auto loopWrapperOp = fir::DoConcurrentOp::create(*builder, loc); + builder->setInsertionPointToStart( + builder->createBlock(&loopWrapperOp.getRegion())); + + for (IncrementLoopInfo &info : llvm::reverse(incrementLoopNestInfo)) { + info.loopVariable = genLoopVariableAddress(loc, *info.loopVariableSym, + info.isConcurrent); + } + + builder->setInsertionPointToEnd(loopWrapperOp.getBody()); + auto loopOp = fir::DoConcurrentLoopOp::create( + *builder, loc, nestLBs, nestUBs, nestSts, /*loopAnnotation=*/nullptr, + /*local_vars=*/mlir::ValueRange{}, + /*local_syms=*/nullptr, /*reduce_vars=*/mlir::ValueRange{}, + /*reduce_byref=*/nullptr, /*reduce_syms=*/nullptr, + /*reduce_attrs=*/nullptr); + + llvm::SmallVector loopBlockArgTypes( + incrementLoopNestInfo.size(), builder->getIndexType()); + llvm::SmallVector loopBlockArgLocs( + incrementLoopNestInfo.size(), loc); + mlir::Region &loopRegion = loopOp.getRegion(); + mlir::Block *loopBlock = builder->createBlock( + &loopRegion, loopRegion.begin(), loopBlockArgTypes, loopBlockArgLocs); + builder->setInsertionPointToStart(loopBlock); + + for (auto [info, blockArg] : + llvm::zip_equal(incrementLoopNestInfo, loopBlock->getArguments())) { + info.loopOp = loopOp; + mlir::Value loopValue = + builder->createConvert(loc, info.getLoopVariableType(), blockArg); + fir::StoreOp::create(*builder, loc, loopValue, info.loopVariable); + + if (info.maskExpr) { + Fortran::lower::StatementContext stmtCtx; + mlir::Value maskCond = createFIRExpr(loc, info.maskExpr, stmtCtx); + stmtCtx.finalizeAndReset(); + mlir::Value maskCondCast = + builder->createConvert(loc, builder->getI1Type(), maskCond); + auto ifOp = fir::IfOp::create(*builder, loc, maskCondCast, + /*withElseRegion=*/false); + builder->setInsertionPointToStart(&ifOp.getThenRegion().front()); + } + } + + IncrementLoopInfo &innermostInfo = incrementLoopNestInfo.back(); + + if (innermostInfo.hasLocalitySpecs()) + handleLocalitySpecs(innermostInfo); + + addLoopAnnotationAttr(innermostInfo, dirs); + } + } + + + + void genFIR( fir::FirOpBuilder *builder , lower::pft::Evaluation &eval, llvm:: ArrayRef< lower::pft::Evaluation *> doStmts) { + // setCurrentPositionAt(doConstruct); + // Fortran::lower::pft::Evaluation &eval = getEval(); + bool unstructuredContext = eval.lowerAsUnstructured(); + + + // Collect loop nest information. + // Generate begin loop code directly for infinite and while loops. + Fortran::lower::pft::Evaluation &doStmtEval = eval.getFirstNestedEvaluation(); + auto *doStmt = doStmtEval.getIf(); + const auto &loopControl = + std::get>(doStmt->t); + mlir::Block *preheaderBlock = doStmtEval.block; + mlir::Block *beginBlock = preheaderBlock ? preheaderBlock : builder->getBlock(); + auto createNextBeginBlock = [&]() { + // Step beginBlock through unstructured preheader, header, and mask + // blocks, created in outermost to innermost order. + return beginBlock = beginBlock->splitBlock(beginBlock->end()); + }; + mlir::Block *headerBlock = unstructuredContext ? createNextBeginBlock() : nullptr; + mlir::Block *bodyBlock = doStmtEval.lexicalSuccessor->block; + mlir::Block *exitBlock = doStmtEval.parentConstruct->constructExit->block; + IncrementLoopNestInfo incrementLoopNestInfo; + const Fortran::parser::ScalarLogicalExpr *whileCondition = nullptr; + bool infiniteLoop = !loopControl.has_value(); + if (infiniteLoop) { + assert(unstructuredContext && "infinite loop must be unstructured"); + startBlock(headerBlock); + } else if ((whileCondition = + std::get_if( + &loopControl->u))) { + assert(unstructuredContext && "while loop must be unstructured"); + maybeStartBlock(preheaderBlock); // no block or empty block + startBlock(headerBlock); + genConditionalBranch(*whileCondition, bodyBlock, exitBlock); + } else if (const auto *bounds = + std::get_if( + &loopControl->u)) { + // Non-concurrent increment loop. + IncrementLoopInfo &info = incrementLoopNestInfo.emplace_back( + *bounds->name.thing.symbol, bounds->lower, bounds->upper, + bounds->step); + if (unstructuredContext) { + maybeStartBlock(preheaderBlock); + info.hasRealControl = info.loopVariableSym->GetType()->IsNumeric( + Fortran::common::TypeCategory::Real); + info.headerBlock = headerBlock; + info.bodyBlock = bodyBlock; + info.exitBlock = exitBlock; + } + } else { + llvm_unreachable("DO CONCURRENT unsupported"); + } + + + + // Increment loop begin code. (Infinite/while code was already generated.) + if (!infiniteLoop && !whileCondition) + genFIRIncrementLoopBegin(incrementLoopNestInfo, doStmtEval.dirs); + + // Loop body code. + auto iter = eval.getNestedEvaluations().begin(); + for (auto end = --eval.getNestedEvaluations().end(); iter != end; ++iter) + genFIR(*iter, unstructuredContext); + + // An EndDoStmt in unstructured code may start a new block. + Fortran::lower::pft::Evaluation &endDoEval = *iter; + assert(endDoEval.getIf() && "no enddo stmt"); + if (unstructuredContext) + maybeStartBlock(endDoEval.block); + + // Loop end code. + if (infiniteLoop || whileCondition) + genBranch(headerBlock); + else + genFIRIncrementLoopEnd(incrementLoopNestInfo); + + // This call may generate a branch in some contexts. + genFIR(endDoEval, unstructuredContext); + + + } + + + }; + } + + #endif + + + +static void genStandaloneInterchangeOp(Fortran::lower::AbstractConverter &converter, + Fortran::lower::SymMap &symTable, + lower::StatementContext &stmtCtx, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + mlir::Location loc, const ConstructQueue &queue, + ConstructQueue::const_iterator item) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + + auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); + auto transforms = llvm::make_range(q.end(), queue.end()); + assert(llvm::range_size(transforms)==1 ); + auto &&transform = *transforms.begin(); + auto d = transform.id; + assert(transform.id == llvm::omp::OMPD_interchange); + auto clauses = transform.clauses; + + bool hasPermutationClause = false; + llvm::SmallVector permutation; + auto &&permutationClause = ClauseFinder::findUniqueClause< Fortran::lower::omp::clause::Permutation>(clauses); + if (permutationClause) { + permutation.reserve(permutationClause->v.size()); + for (auto &&ts : permutationClause->v) { + permutation.push_back(evaluate::ToInt64(ts).value()); + } + } else { + permutation = {2, 1}; + } + + llvm::SmallVector< lower::pft::Evaluation* > loops; + collectLoops(eval,loops, permutation.size()); + // auto innermostDo = loops.back(); + // auto innermostBody = &*std::next(innermostDo->getNestedEvaluations().begin()); + + // TODO: Assert this is a valid permution + llvm::SmallVector< lower::pft::Evaluation* > newLoops; + for (auto perm : permutation) { + newLoops.push_back(loops[perm - 1]); + } + + converter.genPermutatedLoops(newLoops, loops.back()); + +#if 0 +MyFirConverter converter; +converter.builder = &firOpBuilder; +converter.genFir(eval, newLoops); +#endif + + + +#if 0 + mlir::omp::LoopRelatedClauseOps loopInfo; + llvm::SmallVector iv; + collectLoopRelatedInfo(converter, loc, eval, item->clauses, loopInfo, iv); +#endif +} + + static mlir::omp::MaskedOp genMaskedOp(lower::AbstractConverter &converter, lower::SymMap &symTable, lower::StatementContext &stmtCtx, @@ -3534,7 +4100,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_interchange: - llvm_unreachable("MK: standalone interchange not implemented"); + genStandaloneInterchangeOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_workdistribute: newOp = genWorkdistributeOp(converter, symTable, semaCtx, eval, loc, queue, diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index cc57cf66dc158..6abff7c6ddc41 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -33,6 +33,11 @@ #include "llvm/ADT/TypeSwitch.h" #include "llvm/ADT/bit.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" #include #include #include diff --git a/openmp/runtime/test/transform/interchange/intdo-permutation.f90 b/openmp/runtime/test/transform/interchange/intdo-permutation.f90 new file mode 100644 index 0000000000000..a8a8e7f35d018 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/intdo-permutation.f90 @@ -0,0 +1,42 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_intdo + integer :: i, j + print *, 'do' + + !$OMP INTERCHANGE PERMUTATION(2,3,1) + do i = 7, 15, 3 + do j = -1, 1, 2 + do k = 3, 1, -1 + print '("i=", I0, " j=", I0, " k=", I0)', i, j, k + end do + end do + end do + !$OMP END INTERCHANGE + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 k=3 +! CHECK-NEXT: i=10 j=-1 k=3 +! CHECK-NEXT: i=13 j=-1 k=3 +! CHECK-NEXT: i=7 j=-1 k=2 +! CHECK-NEXT: i=10 j=-1 k=2 +! CHECK-NEXT: i=13 j=-1 k=2 +! CHECK-NEXT: i=7 j=-1 k=1 +! CHECK-NEXT: i=10 j=-1 k=1 +! CHECK-NEXT: i=13 j=-1 k=1 +! CHECK-NEXT: i=7 j=1 k=3 +! CHECK-NEXT: i=10 j=1 k=3 +! CHECK-NEXT: i=13 j=1 k=3 +! CHECK-NEXT: i=7 j=1 k=2 +! CHECK-NEXT: i=10 j=1 k=2 +! CHECK-NEXT: i=13 j=1 k=2 +! CHECK-NEXT: i=7 j=1 k=1 +! CHECK-NEXT: i=10 j=1 k=1 +! CHECK-NEXT: i=13 j=1 k=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-firstprivate.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-firstprivate.f90 new file mode 100644 index 0000000000000..e53bb107bad2b --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-firstprivate.f90 @@ -0,0 +1,35 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j, k + print *, 'do' + + k = 1 + !$OMP PARALLEL DO NUM_THREADS(3) FIRSTPRIVATE(k) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + k = k + 1 + print '("i=", I0, " j=", I0, " k=", I0)', i, j, k + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-DAG: i=7 j=-1 k=2 +! CHECK-DAG: i=10 j=-1 k=3 +! CHECK-DAG: i=13 j=-1 k=4 +! CHECK-DAG: i=7 j=0 k=2 +! CHECK-DAG: i=10 j=0 k=3 +! CHECK-DAG: i=13 j=0 k=4 +! CHECK-DAG: i=7 j=1 k=2 +! CHECK-DAG: i=10 j=1 k=3 +! CHECK-DAG: i=13 j=1 k=4 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private.f90 new file mode 100644 index 0000000000000..372ff573a10d2 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private.f90 @@ -0,0 +1,34 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j, k + print *, 'do' + + !$OMP PARALLEL DO NUM_THREADS(4) PRIVATE(k) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + k = i + j + print '("i=", I0, " j=", I0, " k=", I0)', i, j, k + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-DAG: i=7 j=-1 k=6 +! CHECK-DAG: i=10 j=-1 k=9 +! CHECK-DAG: i=13 j=-1 k=12 +! CHECK-DAG: i=7 j=0 k=7 +! CHECK-DAG: i=10 j=0 k=10 +! CHECK-DAG: i=13 j=0 k=13 +! CHECK-DAG: i=7 j=1 k=8 +! CHECK-DAG: i=10 j=1 k=11 +! CHECK-DAG: i=13 j=1 k=14 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-reduction.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-reduction.f90 new file mode 100644 index 0000000000000..8d313becef862 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-reduction.f90 @@ -0,0 +1,27 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j, k + print *, 'do' + + k = 1 + !$OMP PARALLEL DO REDUCTION(+:k) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + k = k + 1 + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' + print '("k=", I0)', k +end program + + +! CHECK: do +! CHECK-NEXT: done +! CHECK-NEXT: k=10 diff --git a/openmp/runtime/test/transform/interchange/target-teams-distribute-parallel-wsloop-intdo.f90 b/openmp/runtime/test/transform/interchange/target-teams-distribute-parallel-wsloop-intdo.f90 new file mode 100644 index 0000000000000..81e681b55eb1d --- /dev/null +++ b/openmp/runtime/test/transform/interchange/target-teams-distribute-parallel-wsloop-intdo.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program target_teams_distribute_parallel_do + integer :: i, j + print *, 'do' + + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO SCHEDULE(static,2) NUM_TEAMS(1) NUM_THREADS(1) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/taskloop-intdo.f90 b/openmp/runtime/test/transform/interchange/taskloop-intdo.f90 new file mode 100644 index 0000000000000..d79f92d2ad074 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/taskloop-intdo.f90 @@ -0,0 +1,35 @@ + +! XFAIL: * +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP TASKLOOP + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END TASKLOOP + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/taskloop-simd-intdo.f90 b/openmp/runtime/test/transform/interchange/taskloop-simd-intdo.f90 index 2e8293dd6bec6..d84be9d1d7a96 100644 --- a/openmp/runtime/test/transform/interchange/taskloop-simd-intdo.f90 +++ b/openmp/runtime/test/transform/interchange/taskloop-simd-intdo.f90 @@ -1,12 +1,13 @@ ! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe ! RUN: %t.exe | FileCheck %s --match-full-lines +! XFAIL: * program interchange_wsloop_intdo integer :: i, j print *, 'do' - !$OMP TASKLOOP SIMD SCHEDULE(static,2) + !$OMP TASKLOOP SIMD !$OMP INTERCHANGE do i = 7, 15, 3 do j = -1, 1 diff --git a/openmp/runtime/test/transform/interchange/teams-distribute-parallel-wsloop-intdo.f90 b/openmp/runtime/test/transform/interchange/teams-distribute-parallel-wsloop-intdo.f90 new file mode 100644 index 0000000000000..6d7fe1afdcdd5 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/teams-distribute-parallel-wsloop-intdo.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program teams_distribute_parallel_do + integer :: i, j + print *, 'do' + + !$OMP TEAMS DISTRIBUTE PARALLEL DO SCHEDULE(static,2) NUM_TEAMS(1) NUM_THREADS(1) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END TEAMS DISTRIBUTE PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done From 0f6ab3997cc0275f04cf12cdad5a63cc88030fe9 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 11 Sep 2025 23:20:19 +0200 Subject: [PATCH 43/64] clang-format --- flang/include/flang/Lower/AbstractConverter.h | 7 +- flang/lib/Lower/Bridge.cpp | 192 +++--- flang/lib/Lower/OpenMP/OpenMP.cpp | 586 ++---------------- 3 files changed, 150 insertions(+), 635 deletions(-) diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h index e7c1cb92c445e..396123b1e1938 100644 --- a/flang/include/flang/Lower/AbstractConverter.h +++ b/flang/include/flang/Lower/AbstractConverter.h @@ -193,9 +193,6 @@ class AbstractConverter { std::unique_ptr expression, mlir::Type eleTy) = 0; - - - //===--------------------------------------------------------------------===// // Expressions //===--------------------------------------------------------------------===// @@ -384,7 +381,9 @@ class AbstractConverter { virtual mlir::StateStack &getStateStack() = 0; - virtual void genPermutatedLoops( llvm:: ArrayRef doStmts, Fortran:: lower::pft::Evaluation *innermostDo) = 0; + virtual void + genPermutatedLoops(llvm::ArrayRef doStmts, + Fortran::lower::pft::Evaluation *innermostDo) = 0; private: /// Options controlling lowering behavior. diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 0f21235872edb..f54fc773bff6c 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -2187,119 +2187,127 @@ class FirConverter : public Fortran::lower::AbstractConverter { // so no clean-up needs to be generated for these entities. } - void genPermutatedLoops( llvm:: ArrayRef doStmts, Fortran:: lower::pft::Evaluation *innermostDo) override { - // Fortran::lower::pft::Evaluation &eval = getEval(); - // bool unstructuredContext = eval.lowerAsUnstructured(); + void + genPermutatedLoops(llvm::ArrayRef doStmts, + Fortran::lower::pft::Evaluation *innermostDo) override { + // Fortran::lower::pft::Evaluation &eval = getEval(); + // bool unstructuredContext = eval.lowerAsUnstructured(); - llvm:: SmallVector< mlir::Block *> headerBlocks; - llvm:: SmallVector loopInfos; + llvm::SmallVector headerBlocks; + llvm::SmallVector loopInfos; - auto enterLoop = [&](Fortran:: lower::pft::Evaluation &eval) { eval.dump(); + auto enterLoop = [&](Fortran::lower::pft::Evaluation &eval) { + eval.dump(); bool unstructuredContext = eval.lowerAsUnstructured(); - // Collect loop nest information. - // Generate begin loop code directly for infinite and while loops. - Fortran::lower::pft::Evaluation &doStmtEval = eval.getFirstNestedEvaluation(); - auto *doStmt = doStmtEval.getIf(); - const auto &loopControl = std::get>(doStmt->t); - mlir::Block *preheaderBlock = doStmtEval.block; - mlir::Block *beginBlock = preheaderBlock ? preheaderBlock : builder->getBlock(); - auto createNextBeginBlock = [&]() { - // Step beginBlock through unstructured preheader, header, and mask - // blocks, created in outermost to innermost order. - return beginBlock = beginBlock->splitBlock(beginBlock->end()); - }; - mlir::Block *headerBlock = unstructuredContext ? createNextBeginBlock() : nullptr; - headerBlocks.push_back(headerBlock); - mlir::Block *bodyBlock = doStmtEval.lexicalSuccessor->block; - mlir::Block *exitBlock = doStmtEval.parentConstruct->constructExit->block; - IncrementLoopNestInfo &incrementLoopNestInfo = loopInfos.emplace_back(); - const Fortran::parser::ScalarLogicalExpr *whileCondition = nullptr; - bool infiniteLoop = !loopControl.has_value(); - if (infiniteLoop) { - assert(unstructuredContext && "infinite loop must be unstructured"); - startBlock(headerBlock); - } else if ((whileCondition = - std::get_if( - &loopControl->u))) { - assert(unstructuredContext && "while loop must be unstructured"); - maybeStartBlock(preheaderBlock); // no block or empty block - startBlock(headerBlock); - genConditionalBranch(*whileCondition, bodyBlock, exitBlock); - } else if (const auto *bounds = - std::get_if( - &loopControl->u)) { - // Non-concurrent increment loop. - IncrementLoopInfo &info = incrementLoopNestInfo.emplace_back( - *bounds->name.thing.symbol, bounds->lower, bounds->upper, - bounds->step); - if (unstructuredContext) { - maybeStartBlock(preheaderBlock); - info.hasRealControl = info.loopVariableSym->GetType()->IsNumeric( - Fortran::common::TypeCategory::Real); - info.headerBlock = headerBlock; - info.bodyBlock = bodyBlock; - info.exitBlock = exitBlock; + // Collect loop nest information. + // Generate begin loop code directly for infinite and while loops. + Fortran::lower::pft::Evaluation &doStmtEval = + eval.getFirstNestedEvaluation(); + auto *doStmt = doStmtEval.getIf(); + const auto &loopControl = + std::get>(doStmt->t); + mlir::Block *preheaderBlock = doStmtEval.block; + mlir::Block *beginBlock = + preheaderBlock ? preheaderBlock : builder->getBlock(); + auto createNextBeginBlock = [&]() { + // Step beginBlock through unstructured preheader, header, and mask + // blocks, created in outermost to innermost order. + return beginBlock = beginBlock->splitBlock(beginBlock->end()); + }; + mlir::Block *headerBlock = + unstructuredContext ? createNextBeginBlock() : nullptr; + headerBlocks.push_back(headerBlock); + mlir::Block *bodyBlock = doStmtEval.lexicalSuccessor->block; + mlir::Block *exitBlock = doStmtEval.parentConstruct->constructExit->block; + IncrementLoopNestInfo &incrementLoopNestInfo = loopInfos.emplace_back(); + const Fortran::parser::ScalarLogicalExpr *whileCondition = nullptr; + bool infiniteLoop = !loopControl.has_value(); + if (infiniteLoop) { + assert(unstructuredContext && "infinite loop must be unstructured"); + startBlock(headerBlock); + } else if ((whileCondition = + std::get_if( + &loopControl->u))) { + assert(unstructuredContext && "while loop must be unstructured"); + maybeStartBlock(preheaderBlock); // no block or empty block + startBlock(headerBlock); + genConditionalBranch(*whileCondition, bodyBlock, exitBlock); + } else if (const auto *bounds = + std::get_if( + &loopControl->u)) { + // Non-concurrent increment loop. + IncrementLoopInfo &info = incrementLoopNestInfo.emplace_back( + *bounds->name.thing.symbol, bounds->lower, bounds->upper, + bounds->step); + if (unstructuredContext) { + maybeStartBlock(preheaderBlock); + info.hasRealControl = info.loopVariableSym->GetType()->IsNumeric( + Fortran::common::TypeCategory::Real); + info.headerBlock = headerBlock; + info.bodyBlock = bodyBlock; + info.exitBlock = exitBlock; + } + } else { + llvm_unreachable("Cannot permute DO CONCURRENT"); } - } else { - llvm_unreachable("Cannot permute DO CONCURRENT"); - } - - - // Increment loop begin code. (Infinite/while code was already generated.) - if (!infiniteLoop && !whileCondition) - genFIRIncrementLoopBegin(incrementLoopNestInfo, doStmtEval.dirs); + // Increment loop begin code. (Infinite/while code was already generated.) + if (!infiniteLoop && !whileCondition) + genFIRIncrementLoopBegin(incrementLoopNestInfo, doStmtEval.dirs); }; + auto leaveLoop = [&](Fortran::lower::pft::Evaluation &eval, + mlir::Block *headerBlock, + IncrementLoopNestInfo &incrementLoopNestInfo) { + eval.dump(); + bool unstructuredContext = eval.lowerAsUnstructured(); + Fortran::lower::pft::Evaluation &doStmtEval = + eval.getFirstNestedEvaluation(); + auto *doStmt = doStmtEval.getIf(); - auto leaveLoop = [&](Fortran:: lower::pft::Evaluation &eval, mlir::Block *headerBlock , IncrementLoopNestInfo &incrementLoopNestInfo) { eval.dump(); - bool unstructuredContext = eval.lowerAsUnstructured(); - - Fortran::lower::pft::Evaluation &doStmtEval = eval.getFirstNestedEvaluation(); - auto *doStmt = doStmtEval.getIf(); - - const auto &loopControl = std::get>(doStmt->t); - bool infiniteLoop = !loopControl.has_value(); - const Fortran::parser::ScalarLogicalExpr *whileCondition = std::get_if( &loopControl->u); - - auto iter = std::prev( eval.getNestedEvaluations().end()); - - // An EndDoStmt in unstructured code may start a new block. - Fortran::lower::pft::Evaluation &endDoEval = *iter; - assert(endDoEval.getIf() && "no enddo stmt"); - if (unstructuredContext) - maybeStartBlock(endDoEval.block); - - // Loop end code. - if (infiniteLoop || whileCondition) - genBranch(headerBlock); - else - genFIRIncrementLoopEnd(incrementLoopNestInfo); - - // This call may generate a branch in some contexts. - genFIR(endDoEval, unstructuredContext); - }; + const auto &loopControl = + std::get>(doStmt->t); + bool infiniteLoop = !loopControl.has_value(); + const Fortran::parser::ScalarLogicalExpr *whileCondition = + std::get_if(&loopControl->u); + auto iter = std::prev(eval.getNestedEvaluations().end()); + // An EndDoStmt in unstructured code may start a new block. + Fortran::lower::pft::Evaluation &endDoEval = *iter; + assert(endDoEval.getIf() && "no enddo stmt"); + if (unstructuredContext) + maybeStartBlock(endDoEval.block); + // Loop end code. + if (infiniteLoop || whileCondition) + genBranch(headerBlock); + else + genFIRIncrementLoopEnd(incrementLoopNestInfo); + // This call may generate a branch in some contexts. + genFIR(endDoEval, unstructuredContext); + }; - for (auto l : doStmts) enterLoop(*l); - + for (auto l : doStmts) + enterLoop(*l); // Loop body code. - // Fortran:: lower::pft::Evaluation *innermostEval = doStmts.back(); innermostEval->dump(); - bool innermostUnstructuredContext = innermostDo->lowerAsUnstructured(); + // Fortran:: lower::pft::Evaluation *innermostEval = doStmts.back(); + // innermostEval->dump(); + bool innermostUnstructuredContext = innermostDo->lowerAsUnstructured(); auto iter = innermostDo->getNestedEvaluations().begin(); - for (auto end = --innermostDo->getNestedEvaluations().end(); iter != end; ++iter) + for (auto end = --innermostDo->getNestedEvaluations().end(); iter != end; + ++iter) genFIR(*iter, innermostUnstructuredContext); - for (auto &&[l,headerBlock,li] : llvm::zip_equal( doStmts,headerBlocks,loopInfos)) - leaveLoop(*l,headerBlock,li); -} + for (auto &&[l, headerBlock, li] : + llvm::zip_equal(doStmts, headerBlocks, loopInfos)) + leaveLoop(*l, headerBlock, li); + } /// Generate FIR for a DO construct. There are six variants: /// - unstructured infinite and while loops diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 7adedd934d582..01387cbc4dc60 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -2279,567 +2279,74 @@ static void genUnrollOp(Fortran::lower::AbstractConverter &converter, mlir::omp::UnrollHeuristicOp::create(firOpBuilder, loc, cli); } -static -void collectLoops( lower::pft::Evaluation &eval, - llvm::SmallVectorImpl< lower::pft::Evaluation* > &result, - int numLoops) { - - - - +static void +collectLoops(lower::pft::Evaluation &eval, + llvm::SmallVectorImpl &result, + int numLoops) { std::size_t loopVarTypeSize = 0; - lower::pft::Evaluation *doConstructEval = &eval.getFirstNestedEvaluation(); + lower::pft::Evaluation *doConstructEval = &eval.getFirstNestedEvaluation(); for (auto i : llvm::seq(numLoops)) { - lower::pft::Evaluation *doLoop = &doConstructEval->getFirstNestedEvaluation(); + lower::pft::Evaluation *doLoop = + &doConstructEval->getFirstNestedEvaluation(); auto *doStmt = doLoop->getIf(); assert(doStmt && "Expected do loop to be in the nested evaluation"); - const auto &loopControl = std::get>(doStmt->t); - const parser::LoopControl::Bounds *bounds = std::get_if(&loopControl->u); + const auto &loopControl = + std::get>(doStmt->t); + const parser::LoopControl::Bounds *bounds = + std::get_if(&loopControl->u); assert(bounds && "Expected bounds for worksharing do loop"); lower::StatementContext stmtCtx; - result.push_back(doConstructEval); - - doConstructEval = &*std::next(doConstructEval->getNestedEvaluations().begin()); + doConstructEval = + &*std::next(doConstructEval->getNestedEvaluations().begin()); }; } +static void genStandaloneInterchangeOp( + Fortran::lower::AbstractConverter &converter, + Fortran::lower::SymMap &symTable, lower::StatementContext &stmtCtx, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, mlir::Location loc, + const ConstructQueue &queue, ConstructQueue::const_iterator item) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); -static void enterDoLoop( lower::pft::Evaluation * doStmt) { -} - - - -static void leaveDoLoop( lower::pft::Evaluation *doStmt) { -} - - - #if 0 - -// copied from Bridge.cpp -namespace { -struct IncrementLoopInfo { - template - explicit IncrementLoopInfo(Fortran::semantics::Symbol &sym, const T &lower, - const T &upper, const std::optional &step, - bool isConcurrent = false) - : loopVariableSym{&sym}, lowerExpr{Fortran::semantics::GetExpr(lower)}, - upperExpr{Fortran::semantics::GetExpr(upper)}, - stepExpr{Fortran::semantics::GetExpr(step)}, - isConcurrent{isConcurrent} {} - - IncrementLoopInfo(IncrementLoopInfo &&) = default; - IncrementLoopInfo &operator=(IncrementLoopInfo &&x) = default; - - bool isStructured() const { return !headerBlock; } - - mlir::Type getLoopVariableType() const { - assert(loopVariable && "must be set"); - return fir::unwrapRefType(loopVariable.getType()); - } - - bool hasLocalitySpecs() const { - return !localSymList.empty() || !localInitSymList.empty() || - !reduceSymList.empty() || !sharedSymList.empty(); - } - - // Data members common to both structured and unstructured loops. - const Fortran::semantics::Symbol *loopVariableSym; - const Fortran::lower::SomeExpr *lowerExpr; - const Fortran::lower::SomeExpr *upperExpr; - const Fortran::lower::SomeExpr *stepExpr; - const Fortran::lower::SomeExpr *maskExpr = nullptr; - bool isConcurrent; - llvm::SmallVector localSymList; - llvm::SmallVector localInitSymList; - llvm::SmallVector reduceSymList; - llvm::SmallVector reduceOperatorList; - llvm::SmallVector sharedSymList; - mlir::Value loopVariable = nullptr; - - // Data members for structured loops. - mlir::Operation *loopOp = nullptr; - - // Data members for unstructured loops. - bool hasRealControl = false; - mlir::Value tripVariable = nullptr; - mlir::Value stepVariable = nullptr; - mlir::Block *headerBlock = nullptr; // loop entry and test block - mlir::Block *maskBlock = nullptr; // concurrent loop mask block - mlir::Block *bodyBlock = nullptr; // first loop body block - mlir::Block *exitBlock = nullptr; // loop exit target block -}; - -using IncrementLoopNestInfo = llvm::SmallVector; - -struct MyFirConverter : public Fortran::lower::AbstractConverter { - fir::FirOpBuilder *builder = nullptr; - Fortran::parser::CharBlock currentPosition; - - /// Return the predicate: "current block does not have a terminator branch". - bool blockIsUnterminated() { - mlir::Block *currentBlock = builder->getBlock(); - return currentBlock->empty() || !currentBlock->back().hasTrait(); - } - - /// Convert a parser CharBlock to a Location - mlir::Location toLocation(const Fortran::parser::CharBlock &cb) { - return genLocation(cb); - } - - mlir::Location toLocation() { return toLocation(currentPosition); } - - -#if 0 - static mlir::Location genLocation(Fortran::parser::SourcePosition pos, mlir::MLIRContext &ctx) { - llvm::SmallString<256> path(*pos.path); - llvm::sys::fs::make_absolute(path); - llvm::sys::path::remove_dots(path); - return mlir::FileLineColLoc::get(&ctx, path.str(), pos.line, pos.column); - } -#endif - - - void genBranch(mlir::Block *targetBlock) { - assert(targetBlock && "missing unconditional target block"); - mlir::cf::BranchOp::create(*builder, toLocation(), targetBlock); - } - - /// Unconditionally switch code insertion to a new block. - void startBlock(mlir::Block *newBlock) { - assert(newBlock && "missing block"); - // Default termination for the current block is a fallthrough branch to - // the new block. - if (blockIsUnterminated()) - genBranch(newBlock); - // Some blocks may be re/started more than once, and might not be empty. - // If the new block already has (only) a terminator, set the insertion - // point to the start of the block. Otherwise set it to the end. - builder->setInsertionPointToStart(newBlock); - if (blockIsUnterminated()) - builder->setInsertionPointToEnd(newBlock); - } - - - /// Conditionally switch code insertion to a new block. - void maybeStartBlock(mlir::Block *newBlock) { - if (newBlock) - startBlock(newBlock); - } - - - void genConditionalBranch(mlir::Value cond, mlir::Block *trueTarget, - mlir::Block *falseTarget) { - assert(trueTarget && "missing conditional branch true block"); - assert(falseTarget && "missing conditional branch false block"); - mlir::Location loc = toLocation(); - mlir::Value bcc = builder->createConvert(loc, builder->getI1Type(), cond); - mlir::cf::CondBranchOp::create(*builder, loc, bcc, trueTarget, - mlir::ValueRange{}, falseTarget, - mlir::ValueRange{}); - } - - - void genConditionalBranch(mlir::Value cond, - Fortran::lower::pft::Evaluation *trueTarget, - Fortran::lower::pft::Evaluation *falseTarget) { - genConditionalBranch(cond, trueTarget->block, falseTarget->block); - } - - mlir::Value createFIRExpr(mlir::Location loc, - const Fortran::lower::SomeExpr *expr, - Fortran::lower::StatementContext &stmtCtx) { - return fir::getBase(genExprValue(*expr, stmtCtx, &loc)); - } - - - void genConditionalBranch(const Fortran::parser::ScalarLogicalExpr &expr, - mlir::Block *trueTarget, mlir::Block *falseTarget) { - Fortran::lower::StatementContext stmtCtx; - mlir::Value cond = createFIRExpr(toLocation(), Fortran::semantics::GetExpr(expr), stmtCtx); - stmtCtx.finalizeAndReset(); - genConditionalBranch(cond, trueTarget, falseTarget); - } - - - void genConditionalBranch(const Fortran::parser::ScalarLogicalExpr &expr, - Fortran::lower::pft::Evaluation *trueTarget, - Fortran::lower::pft::Evaluation *falseTarget) { - Fortran::lower::StatementContext stmtCtx; - mlir::Value cond = createFIRExpr(toLocation(), Fortran::semantics::GetExpr(expr), stmtCtx); - stmtCtx.finalizeAndReset(); - genConditionalBranch(cond, trueTarget->block, falseTarget->block); - } - - /// Generate the address of loop variable \p sym. - /// If \p sym is not mapped yet, allocate local storage for it. - mlir::Value genLoopVariableAddress(mlir::Location loc, - const Fortran::semantics::Symbol &sym, - bool isUnordered) { - if (!shallowLookupSymbol(sym) && - (isUnordered || - GetSymbolDSA(sym).test(Fortran::semantics::Symbol::Flag::OmpPrivate) || - GetSymbolDSA(sym).test( - Fortran::semantics::Symbol::Flag::OmpFirstPrivate) || - GetSymbolDSA(sym).test( - Fortran::semantics::Symbol::Flag::OmpLastPrivate) || - GetSymbolDSA(sym).test(Fortran::semantics::Symbol::Flag::OmpLinear))) { - // Do concurrent loop variables are not mapped yet since they are - // local to the Do concurrent scope (same for OpenMP loops). - mlir::OpBuilder::InsertPoint insPt = builder->saveInsertionPoint(); - builder->setInsertionPointToStart(builder->getAllocaBlock()); - mlir::Type tempTy = genType(sym); - mlir::Value temp = - builder->createTemporaryAlloc(loc, tempTy, toStringRef(sym.name())); - bindIfNewSymbol(sym, temp); - builder->restoreInsertionPoint(insPt); - } - auto entry = lookupSymbol(sym); - (void)entry; - assert(entry && "loop control variable must already be in map"); - Fortran::lower::StatementContext stmtCtx; - return fir::getBase( - genExprAddr(Fortran::evaluate::AsGenericExpr(sym).value(), stmtCtx)); - } - - /// Generate FIR to begin a structured or unstructured increment loop nest. - void genFIRIncrementLoopBegin( - IncrementLoopNestInfo &incrementLoopNestInfo, - llvm::SmallVectorImpl &dirs) { - assert(!incrementLoopNestInfo.empty() && "empty loop nest"); - mlir::Location loc = toLocation(); - mlir::arith::IntegerOverflowFlags iofBackup{}; - - llvm::SmallVector nestLBs; - llvm::SmallVector nestUBs; - llvm::SmallVector nestSts; - llvm::SmallVector nestReduceOperands; - llvm::SmallVector nestReduceAttrs; - bool genDoConcurrent = false; - - for (IncrementLoopInfo &info : incrementLoopNestInfo) { - genDoConcurrent = info.isStructured() && info.isConcurrent; - - if (!genDoConcurrent) - info.loopVariable = genLoopVariableAddress(loc, *info.loopVariableSym, info.isConcurrent); - - if (!getLoweringOptions().getIntegerWrapAround()) { - iofBackup = builder->getIntegerOverflowFlags(); - builder->setIntegerOverflowFlags( - mlir::arith::IntegerOverflowFlags::nsw); - } - - nestLBs.push_back(genControlValue(info.lowerExpr, info)); - nestUBs.push_back(genControlValue(info.upperExpr, info)); - bool isConst = true; - nestSts.push_back(genControlValue( - info.stepExpr, info, info.isStructured() ? nullptr : &isConst)); - - if (!getLoweringOptions().getIntegerWrapAround()) - builder->setIntegerOverflowFlags(iofBackup); - - // Use a temp variable for unstructured loops with non-const step. - if (!isConst) { - mlir::Value stepValue = nestSts.back(); - info.stepVariable = builder->createTemporary(loc, stepValue.getType()); - fir::StoreOp::create(*builder, loc, stepValue, info.stepVariable); - } - } - - for (auto [info, lowerValue, upperValue, stepValue] : - llvm::zip_equal(incrementLoopNestInfo, nestLBs, nestUBs, nestSts)) { - // Structured loop - generate fir.do_loop. - if (info.isStructured()) { - if (genDoConcurrent) - continue; - - // The loop variable is a doLoop op argument. - mlir::Type loopVarType = info.getLoopVariableType(); - auto loopOp = fir::DoLoopOp::create( - *builder, loc, lowerValue, upperValue, stepValue, - /*unordered=*/false, - /*finalCountValue=*/true, - builder->createConvert(loc, loopVarType, lowerValue)); - info.loopOp = loopOp; - builder->setInsertionPointToStart(loopOp.getBody()); - mlir::Value loopValue = loopOp.getRegionIterArgs()[0]; - - // Update the loop variable value in case it has non-index references. - fir::StoreOp::create(*builder, loc, loopValue, info.loopVariable); - addLoopAnnotationAttr(info, dirs); - continue; - } - - // Unstructured loop preheader - initialize tripVariable and loopVariable. - mlir::Value tripCount; - if (info.hasRealControl) { - auto diff1 = - mlir::arith::SubFOp::create(*builder, loc, upperValue, lowerValue); - auto diff2 = - mlir::arith::AddFOp::create(*builder, loc, diff1, stepValue); - tripCount = - mlir::arith::DivFOp::create(*builder, loc, diff2, stepValue); - tripCount = - builder->createConvert(loc, builder->getIndexType(), tripCount); - } else { - auto diff1 = - mlir::arith::SubIOp::create(*builder, loc, upperValue, lowerValue); - auto diff2 = - mlir::arith::AddIOp::create(*builder, loc, diff1, stepValue); - tripCount = - mlir::arith::DivSIOp::create(*builder, loc, diff2, stepValue); - } - if (forceLoopToExecuteOnce) { // minimum tripCount is 1 - mlir::Value one = - builder->createIntegerConstant(loc, tripCount.getType(), 1); - auto cond = mlir::arith::CmpIOp::create( - *builder, loc, mlir::arith::CmpIPredicate::slt, tripCount, one); - tripCount = - mlir::arith::SelectOp::create(*builder, loc, cond, one, tripCount); - } - info.tripVariable = builder->createTemporary(loc, tripCount.getType()); - fir::StoreOp::create(*builder, loc, tripCount, info.tripVariable); - fir::StoreOp::create(*builder, loc, lowerValue, info.loopVariable); - - // Unstructured loop header - generate loop condition and mask. - // Note - Currently there is no way to tag a loop as a concurrent loop. - startBlock(info.headerBlock); - tripCount = fir::LoadOp::create(*builder, loc, info.tripVariable); - mlir::Value zero = - builder->createIntegerConstant(loc, tripCount.getType(), 0); - auto cond = mlir::arith::CmpIOp::create( - *builder, loc, mlir::arith::CmpIPredicate::sgt, tripCount, zero); - if (info.maskExpr) { - genConditionalBranch(cond, info.maskBlock, info.exitBlock); - startBlock(info.maskBlock); - mlir::Block *latchBlock = getEval().getLastNestedEvaluation().block; - assert(latchBlock && "missing masked concurrent loop latch block"); - Fortran::lower::StatementContext stmtCtx; - mlir::Value maskCond = createFIRExpr(loc, info.maskExpr, stmtCtx); - stmtCtx.finalizeAndReset(); - genConditionalBranch(maskCond, info.bodyBlock, latchBlock); - } else { - genConditionalBranch(cond, info.bodyBlock, info.exitBlock); - if (&info != &incrementLoopNestInfo.back()) // not innermost - startBlock(info.bodyBlock); // preheader block of enclosed dimension - } - if (info.hasLocalitySpecs()) { - mlir::OpBuilder::InsertPoint insertPt = builder->saveInsertionPoint(); - builder->setInsertionPointToStart(info.bodyBlock); - handleLocalitySpecs(info); - builder->restoreInsertionPoint(insertPt); - } - } - - if (genDoConcurrent) { - auto loopWrapperOp = fir::DoConcurrentOp::create(*builder, loc); - builder->setInsertionPointToStart( - builder->createBlock(&loopWrapperOp.getRegion())); - - for (IncrementLoopInfo &info : llvm::reverse(incrementLoopNestInfo)) { - info.loopVariable = genLoopVariableAddress(loc, *info.loopVariableSym, - info.isConcurrent); - } - - builder->setInsertionPointToEnd(loopWrapperOp.getBody()); - auto loopOp = fir::DoConcurrentLoopOp::create( - *builder, loc, nestLBs, nestUBs, nestSts, /*loopAnnotation=*/nullptr, - /*local_vars=*/mlir::ValueRange{}, - /*local_syms=*/nullptr, /*reduce_vars=*/mlir::ValueRange{}, - /*reduce_byref=*/nullptr, /*reduce_syms=*/nullptr, - /*reduce_attrs=*/nullptr); - - llvm::SmallVector loopBlockArgTypes( - incrementLoopNestInfo.size(), builder->getIndexType()); - llvm::SmallVector loopBlockArgLocs( - incrementLoopNestInfo.size(), loc); - mlir::Region &loopRegion = loopOp.getRegion(); - mlir::Block *loopBlock = builder->createBlock( - &loopRegion, loopRegion.begin(), loopBlockArgTypes, loopBlockArgLocs); - builder->setInsertionPointToStart(loopBlock); - - for (auto [info, blockArg] : - llvm::zip_equal(incrementLoopNestInfo, loopBlock->getArguments())) { - info.loopOp = loopOp; - mlir::Value loopValue = - builder->createConvert(loc, info.getLoopVariableType(), blockArg); - fir::StoreOp::create(*builder, loc, loopValue, info.loopVariable); - - if (info.maskExpr) { - Fortran::lower::StatementContext stmtCtx; - mlir::Value maskCond = createFIRExpr(loc, info.maskExpr, stmtCtx); - stmtCtx.finalizeAndReset(); - mlir::Value maskCondCast = - builder->createConvert(loc, builder->getI1Type(), maskCond); - auto ifOp = fir::IfOp::create(*builder, loc, maskCondCast, - /*withElseRegion=*/false); - builder->setInsertionPointToStart(&ifOp.getThenRegion().front()); - } - } - - IncrementLoopInfo &innermostInfo = incrementLoopNestInfo.back(); - - if (innermostInfo.hasLocalitySpecs()) - handleLocalitySpecs(innermostInfo); + auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); + auto transforms = llvm::make_range(q.end(), queue.end()); + assert(llvm::range_size(transforms) == 1); + auto &&transform = *transforms.begin(); + auto d = transform.id; + assert(transform.id == llvm::omp::OMPD_interchange); + auto clauses = transform.clauses; - addLoopAnnotationAttr(innermostInfo, dirs); + bool hasPermutationClause = false; + llvm::SmallVector permutation; + auto &&permutationClause = + ClauseFinder::findUniqueClause( + clauses); + if (permutationClause) { + permutation.reserve(permutationClause->v.size()); + for (auto &&ts : permutationClause->v) { + permutation.push_back(evaluate::ToInt64(ts).value()); } + } else { + permutation = {2, 1}; } + llvm::SmallVector loops; + collectLoops(eval, loops, permutation.size()); - - void genFIR( fir::FirOpBuilder *builder , lower::pft::Evaluation &eval, llvm:: ArrayRef< lower::pft::Evaluation *> doStmts) { - // setCurrentPositionAt(doConstruct); - // Fortran::lower::pft::Evaluation &eval = getEval(); - bool unstructuredContext = eval.lowerAsUnstructured(); - - - // Collect loop nest information. - // Generate begin loop code directly for infinite and while loops. - Fortran::lower::pft::Evaluation &doStmtEval = eval.getFirstNestedEvaluation(); - auto *doStmt = doStmtEval.getIf(); - const auto &loopControl = - std::get>(doStmt->t); - mlir::Block *preheaderBlock = doStmtEval.block; - mlir::Block *beginBlock = preheaderBlock ? preheaderBlock : builder->getBlock(); - auto createNextBeginBlock = [&]() { - // Step beginBlock through unstructured preheader, header, and mask - // blocks, created in outermost to innermost order. - return beginBlock = beginBlock->splitBlock(beginBlock->end()); - }; - mlir::Block *headerBlock = unstructuredContext ? createNextBeginBlock() : nullptr; - mlir::Block *bodyBlock = doStmtEval.lexicalSuccessor->block; - mlir::Block *exitBlock = doStmtEval.parentConstruct->constructExit->block; - IncrementLoopNestInfo incrementLoopNestInfo; - const Fortran::parser::ScalarLogicalExpr *whileCondition = nullptr; - bool infiniteLoop = !loopControl.has_value(); - if (infiniteLoop) { - assert(unstructuredContext && "infinite loop must be unstructured"); - startBlock(headerBlock); - } else if ((whileCondition = - std::get_if( - &loopControl->u))) { - assert(unstructuredContext && "while loop must be unstructured"); - maybeStartBlock(preheaderBlock); // no block or empty block - startBlock(headerBlock); - genConditionalBranch(*whileCondition, bodyBlock, exitBlock); - } else if (const auto *bounds = - std::get_if( - &loopControl->u)) { - // Non-concurrent increment loop. - IncrementLoopInfo &info = incrementLoopNestInfo.emplace_back( - *bounds->name.thing.symbol, bounds->lower, bounds->upper, - bounds->step); - if (unstructuredContext) { - maybeStartBlock(preheaderBlock); - info.hasRealControl = info.loopVariableSym->GetType()->IsNumeric( - Fortran::common::TypeCategory::Real); - info.headerBlock = headerBlock; - info.bodyBlock = bodyBlock; - info.exitBlock = exitBlock; - } - } else { - llvm_unreachable("DO CONCURRENT unsupported"); - } - - - - // Increment loop begin code. (Infinite/while code was already generated.) - if (!infiniteLoop && !whileCondition) - genFIRIncrementLoopBegin(incrementLoopNestInfo, doStmtEval.dirs); - - // Loop body code. - auto iter = eval.getNestedEvaluations().begin(); - for (auto end = --eval.getNestedEvaluations().end(); iter != end; ++iter) - genFIR(*iter, unstructuredContext); - - // An EndDoStmt in unstructured code may start a new block. - Fortran::lower::pft::Evaluation &endDoEval = *iter; - assert(endDoEval.getIf() && "no enddo stmt"); - if (unstructuredContext) - maybeStartBlock(endDoEval.block); - - // Loop end code. - if (infiniteLoop || whileCondition) - genBranch(headerBlock); - else - genFIRIncrementLoopEnd(incrementLoopNestInfo); - - // This call may generate a branch in some contexts. - genFIR(endDoEval, unstructuredContext); - - + // TODO: Assert this is a valid permution + llvm::SmallVector newLoops; + for (auto perm : permutation) { + newLoops.push_back(loops[perm - 1]); } - - }; - } - - #endif - - - -static void genStandaloneInterchangeOp(Fortran::lower::AbstractConverter &converter, - Fortran::lower::SymMap &symTable, - lower::StatementContext &stmtCtx, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, - mlir::Location loc, const ConstructQueue &queue, - ConstructQueue::const_iterator item) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - - auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); - auto transforms = llvm::make_range(q.end(), queue.end()); - assert(llvm::range_size(transforms)==1 ); - auto &&transform = *transforms.begin(); - auto d = transform.id; - assert(transform.id == llvm::omp::OMPD_interchange); - auto clauses = transform.clauses; - - bool hasPermutationClause = false; - llvm::SmallVector permutation; - auto &&permutationClause = ClauseFinder::findUniqueClause< Fortran::lower::omp::clause::Permutation>(clauses); - if (permutationClause) { - permutation.reserve(permutationClause->v.size()); - for (auto &&ts : permutationClause->v) { - permutation.push_back(evaluate::ToInt64(ts).value()); - } - } else { - permutation = {2, 1}; - } - - llvm::SmallVector< lower::pft::Evaluation* > loops; - collectLoops(eval,loops, permutation.size()); - // auto innermostDo = loops.back(); - // auto innermostBody = &*std::next(innermostDo->getNestedEvaluations().begin()); - - // TODO: Assert this is a valid permution - llvm::SmallVector< lower::pft::Evaluation* > newLoops; - for (auto perm : permutation) { - newLoops.push_back(loops[perm - 1]); - } - - converter.genPermutatedLoops(newLoops, loops.back()); - -#if 0 -MyFirConverter converter; -converter.builder = &firOpBuilder; -converter.genFir(eval, newLoops); -#endif - - - -#if 0 - mlir::omp::LoopRelatedClauseOps loopInfo; - llvm::SmallVector iv; - collectLoopRelatedInfo(converter, loc, eval, item->clauses, loopInfo, iv); -#endif + converter.genPermutatedLoops(newLoops, loops.back()); } - static mlir::omp::MaskedOp genMaskedOp(lower::AbstractConverter &converter, lower::SymMap &symTable, lower::StatementContext &stmtCtx, @@ -4100,7 +3607,8 @@ static void genOMPDispatch(lower::AbstractConverter &converter, genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_interchange: - genStandaloneInterchangeOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); + genStandaloneInterchangeOp(converter, symTable, stmtCtx, semaCtx, eval, loc, + queue, item); break; case llvm::omp::Directive::OMPD_workdistribute: newOp = genWorkdistributeOp(converter, symTable, semaCtx, eval, loc, queue, From d77da88cc6f786e62d6ad074a62f09db9ead32ee Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 11 Sep 2025 23:32:46 +0200 Subject: [PATCH 44/64] Add tests --- .../parallel-wsloop-intdo-lastprivate.f90 | 28 ++++++++++++++++ .../parallel-wsloop-intdo-private-i.f90 | 33 +++++++++++++++++++ .../parallel-wsloop-intdo-private-j.f90 | 33 +++++++++++++++++++ 3 files changed, 94 insertions(+) create mode 100644 openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-lastprivate.f90 create mode 100644 openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private-i.f90 create mode 100644 openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private-j.f90 diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-lastprivate.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-lastprivate.f90 new file mode 100644 index 0000000000000..42d7032bd2184 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-lastprivate.f90 @@ -0,0 +1,28 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines +! XFAIL: * + +program interchange_wsloop_intdo + integer :: i, j, k + print *, 'do' + + + !$OMP PARALLEL DO LASTPRIVATE(k) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + k = i*10 + j + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' + print '("k=", I0)', k +end program + + +! CHECK: do +! CHECK-NEXT: done +! CHECK-NEXT: k=131 diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private-i.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private-i.f90 new file mode 100644 index 0000000000000..76928ce93577e --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private-i.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP PARALLEL DO NUM_THREADS(1) PRIVATE(i) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private-j.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private-j.f90 new file mode 100644 index 0000000000000..a679c921e9660 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private-j.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP PARALLEL DO NUM_THREADS(1) PRIVATE(j) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done From 23778317aa35a031cdf65ad337e73c3565cd2a00 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 11 Sep 2025 23:46:18 +0200 Subject: [PATCH 45/64] Reduce diff --- flang/lib/Lower/OpenMP/OpenMP.cpp | 6 ------ flang/lib/Lower/OpenMP/Utils.cpp | 16 +++++++--------- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 01387cbc4dc60..37d2a7dc38cb7 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -45,16 +45,10 @@ #include "mlir/Transforms/RegionUtils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/Path.h" using namespace Fortran::lower::omp; using namespace Fortran::common::openmp; using namespace Fortran::utils::openmp; -using namespace Fortran::semantics; //===----------------------------------------------------------------------===// // Code generation helper functions diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 91e40f6a0f5e6..e6559d27c84ba 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -25,19 +25,17 @@ #include #include #include -#include #include +#include #include #include -using namespace Fortran::semantics; - template -MaybeIntExpr EvaluateIntExpr(SemanticsContext &context, const T &expr) { - if (MaybeExpr maybeExpr{ +Fortran::semantics::MaybeIntExpr EvaluateIntExpr(Fortran::semantics::SemanticsContext &context, const T &expr) { + if (Fortran::semantics::MaybeExpr maybeExpr{ Fold(context.foldingContext(), AnalyzeExpr(context, expr))}) { - if (auto *intExpr{Fortran::evaluate::UnwrapExpr(*maybeExpr)}) { + if (auto *intExpr{Fortran::evaluate::UnwrapExpr(*maybeExpr)}) { return std::move(*intExpr); } } @@ -45,7 +43,7 @@ MaybeIntExpr EvaluateIntExpr(SemanticsContext &context, const T &expr) { } template -std::optional EvaluateInt64(SemanticsContext &context, +std::optional EvaluateInt64(Fortran::semantics::SemanticsContext &context, const T &expr) { return Fortran::evaluate::ToInt64(EvaluateIntExpr(context, expr)); } @@ -602,7 +600,7 @@ static void convertLoopBounds(lower::AbstractConverter &converter, /// Contains a loop construct with an inner tiling construct. void collectTileSizesFromOpenMPConstruct( const parser::OpenMPConstruct *ompCons, - llvm::SmallVectorImpl &tileSizes, SemanticsContext &semaCtx) { + llvm::SmallVectorImpl &tileSizes,Fortran::semantics:: SemanticsContext &semaCtx) { if (!ompCons) return; @@ -642,7 +640,7 @@ void collectTileSizesFromOpenMPConstruct( /// Contains a loop construct with an inner tiling construct. void collectPermutationFromOpenMPConstruct( const parser::OpenMPConstruct *ompCons, - llvm::SmallVectorImpl &permutation, SemanticsContext &semaCtx) { + llvm::SmallVectorImpl &permutation,Fortran::semantics:: SemanticsContext &semaCtx) { if (!ompCons) return; From 8ebe17b48f9611b4730c974a29814156cc05f03f Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 11 Sep 2025 23:47:32 +0200 Subject: [PATCH 46/64] Reduce diff from trunk --- flang/lib/Lower/Bridge.cpp | 4 -- flang/lib/Lower/OpenMP/OpenMP.cpp | 20 +----- flang/lib/Lower/OpenMP/Utils.cpp | 17 +++-- .../mlir/Dialect/OpenMP/OpenMPClauses.td | 19 ------ mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 7 +- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 64 ++++++++----------- 6 files changed, 42 insertions(+), 89 deletions(-) diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index f54fc773bff6c..0e057d928d345 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -2197,7 +2197,6 @@ class FirConverter : public Fortran::lower::AbstractConverter { llvm::SmallVector loopInfos; auto enterLoop = [&](Fortran::lower::pft::Evaluation &eval) { - eval.dump(); bool unstructuredContext = eval.lowerAsUnstructured(); // Collect loop nest information. @@ -2260,7 +2259,6 @@ class FirConverter : public Fortran::lower::AbstractConverter { auto leaveLoop = [&](Fortran::lower::pft::Evaluation &eval, mlir::Block *headerBlock, IncrementLoopNestInfo &incrementLoopNestInfo) { - eval.dump(); bool unstructuredContext = eval.lowerAsUnstructured(); Fortran::lower::pft::Evaluation &doStmtEval = @@ -2295,8 +2293,6 @@ class FirConverter : public Fortran::lower::AbstractConverter { enterLoop(*l); // Loop body code. - // Fortran:: lower::pft::Evaluation *innermostEval = doStmts.back(); - // innermostEval->dump(); bool innermostUnstructuredContext = innermostDo->lowerAsUnstructured(); auto iter = innermostDo->getNestedEvaluations().begin(); diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 37d2a7dc38cb7..a1caabbe8cf55 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1202,10 +1202,6 @@ struct OpWithBodyGenInfo { static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info, const ConstructQueue &queue, ConstructQueue::const_iterator item) { - int a = 0; - if (a) { - op.dump(); - } fir::FirOpBuilder &firOpBuilder = info.converter.getFirOpBuilder(); auto insertMarker = [](fir::FirOpBuilder &builder) { @@ -1348,10 +1344,6 @@ static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info, // wrapper region. mlir::Operation *privatizationBottomLevelOp = &op; if (auto loopNest = llvm::dyn_cast(op)) { - int b = 0; - if (b) { - loopNest.dump(); - } llvm::SmallVector wrappers; loopNest.gatherWrappers(wrappers); if (!wrappers.empty()) @@ -2034,7 +2026,6 @@ static mlir::omp::LoopNestOp genLoopNestOp( switch (d) { case llvm::omp::OMPD_interchange: { - bool hasPermutationClause = false; llvm::SmallVector permutation; auto &&permutationClause = ClauseFinder::findUniqueClause< @@ -2277,10 +2268,8 @@ static void collectLoops(lower::pft::Evaluation &eval, llvm::SmallVectorImpl &result, int numLoops) { - - std::size_t loopVarTypeSize = 0; lower::pft::Evaluation *doConstructEval = &eval.getFirstNestedEvaluation(); - for (auto i : llvm::seq(numLoops)) { + for ([[maybe_unused]] auto i : llvm::seq(numLoops)) { lower::pft::Evaluation *doLoop = &doConstructEval->getFirstNestedEvaluation(); auto *doStmt = doLoop->getIf(); @@ -2305,17 +2294,13 @@ static void genStandaloneInterchangeOp( Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); auto transforms = llvm::make_range(q.end(), queue.end()); assert(llvm::range_size(transforms) == 1); auto &&transform = *transforms.begin(); - auto d = transform.id; assert(transform.id == llvm::omp::OMPD_interchange); auto clauses = transform.clauses; - bool hasPermutationClause = false; llvm::SmallVector permutation; auto &&permutationClause = ClauseFinder::findUniqueClause( @@ -3415,7 +3400,6 @@ static mlir::omp::TaskloopOp genCompositeTaskloopSimd( lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); - auto transforms = llvm::make_range(q.end(), queue.end()); assert(llvm::range_size(q) == 2 && "Invalid leaf constructs"); @@ -4072,7 +4056,7 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, const Fortran::parser::OpenMPLoopConstruct &x = ompNestedLoopCons->value(); const Fortran::parser::OmpBeginLoopDirective &y = std::get<0>(x.t); - const Fortran::parser::OmpClauseList &clauseList = std::get<1>(y.t); + // const Fortran::parser::OmpClauseList &clauseList = std::get<1>(y.t); llvm::omp::Directive nestedDirective = parser::omp::GetOmpDirectiveName(*ompNestedLoopCons).v; List nestedClauses = diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index e6559d27c84ba..30fff48181d73 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -32,10 +32,13 @@ #include template -Fortran::semantics::MaybeIntExpr EvaluateIntExpr(Fortran::semantics::SemanticsContext &context, const T &expr) { +Fortran::semantics::MaybeIntExpr +EvaluateIntExpr(Fortran::semantics::SemanticsContext &context, const T &expr) { if (Fortran::semantics::MaybeExpr maybeExpr{ Fold(context.foldingContext(), AnalyzeExpr(context, expr))}) { - if (auto *intExpr{Fortran::evaluate::UnwrapExpr(*maybeExpr)}) { + if (auto *intExpr{ + Fortran::evaluate::UnwrapExpr( + *maybeExpr)}) { return std::move(*intExpr); } } @@ -43,8 +46,8 @@ Fortran::semantics::MaybeIntExpr EvaluateIntExpr(Fortran::semantics::SemanticsCo } template -std::optional EvaluateInt64(Fortran::semantics::SemanticsContext &context, - const T &expr) { +std::optional +EvaluateInt64(Fortran::semantics::SemanticsContext &context, const T &expr) { return Fortran::evaluate::ToInt64(EvaluateIntExpr(context, expr)); } @@ -600,7 +603,8 @@ static void convertLoopBounds(lower::AbstractConverter &converter, /// Contains a loop construct with an inner tiling construct. void collectTileSizesFromOpenMPConstruct( const parser::OpenMPConstruct *ompCons, - llvm::SmallVectorImpl &tileSizes,Fortran::semantics:: SemanticsContext &semaCtx) { + llvm::SmallVectorImpl &tileSizes, + Fortran::semantics::SemanticsContext &semaCtx) { if (!ompCons) return; @@ -640,7 +644,8 @@ void collectTileSizesFromOpenMPConstruct( /// Contains a loop construct with an inner tiling construct. void collectPermutationFromOpenMPConstruct( const parser::OpenMPConstruct *ompCons, - llvm::SmallVectorImpl &permutation,Fortran::semantics:: SemanticsContext &semaCtx) { + llvm::SmallVectorImpl &permutation, + Fortran::semantics::SemanticsContext &semaCtx) { if (!ompCons) return; diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td index 9102a4320c578..5f40abe62a0f6 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td @@ -1418,25 +1418,6 @@ class OpenMP_TileSizesClauseSkip< def OpenMP_TileSizesClause : OpenMP_TileSizesClauseSkip<>; - -//===----------------------------------------------------------------------===// -// V6.0: [xx.x] `permutation` clause -//===----------------------------------------------------------------------===// - -class OpenMP_PermutationClauseSkip< - bit traits = false, bit arguments = false, bit assemblyFormat = false, - bit description = false, bit extraClassDeclaration = false - > : OpenMP_Clause { - let arguments = (ins - BoolAttr:$interchangeEnabled, - OptionalAttr:$permutation - ); -} - -def OpenMP_PermutationClause : OpenMP_PermutationClauseSkip<>; - - //===----------------------------------------------------------------------===// // V5.2: [12.1] `untied` clause //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 6abff7c6ddc41..aa88b9e8eef5a 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -33,11 +33,6 @@ #include "llvm/ADT/TypeSwitch.h" #include "llvm/ADT/bit.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/Path.h" #include #include #include @@ -2985,7 +2980,7 @@ ParseResult LoopNestOp::parse(OpAsmParser &parser, OperationState &result) { return failure(); if (value > 1) result.addAttribute( - "num_collapse", + "collapse_num_loops", IntegerAttr::get(parser.getBuilder().getI64Type(), value)); // Parse tiles diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 6600747ad85e8..2ab6bb0a73200 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -3044,53 +3044,45 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointTy afterIP = loopInfos.front()->getAfterIP(); - llvm::CanonicalLoopInfo *NewTopLoopInfo = nullptr; - - // Do tiling + // Do tiling. if (const auto &tiles = loopOp.getTileSizes()) { - llvm::Type *IVType = loopInfos.front()->getIndVarType(); - SmallVector TileSizes; + llvm::Type *ivType = loopInfos.front()->getIndVarType(); + SmallVector tileSizes; for (auto tile : tiles.value()) { - llvm::Value *TileVal = llvm::ConstantInt::get(IVType, tile); - TileSizes.push_back(TileVal); + llvm::Value *tileVal = llvm::ConstantInt::get(ivType, tile); + tileSizes.push_back(tileVal); } - std::vector NewLoops = - ompBuilder->tileLoops(ompLoc.DL, loopInfos, TileSizes); + std::vector newLoops = + ompBuilder->tileLoops(ompLoc.DL, loopInfos, tileSizes); // Update afterIP to get the correct insertion point after // tiling. - llvm::BasicBlock *AfterBB = NewLoops.front()->getAfter(); - llvm::BasicBlock *AfterAfterBB = AfterBB->getSingleSuccessor(); - afterIP = {AfterAfterBB, AfterAfterBB->begin()}; - NewTopLoopInfo = NewLoops[0]; + llvm::BasicBlock *afterBB = newLoops.front()->getAfter(); + llvm::BasicBlock *afterAfterBB = afterBB->getSingleSuccessor(); + afterIP = {afterAfterBB, afterAfterBB->begin()}; - // Update the loop infos + // Update the loop infos. loopInfos.clear(); - for (const auto &newLoop : NewLoops) + for (const auto &newLoop : newLoops) loopInfos.push_back(newLoop); - } // Tiling done - - // Do collapse - if (const auto &numCollapse = loopOp.getCollapseNumLoops()) { - SmallVector collapseLoopInfos( - loopInfos.begin(), loopInfos.begin() + (numCollapse)); - - auto newLoopInfo = - ompBuilder->collapseLoops(ompLoc.DL, collapseLoopInfos, {}); - NewTopLoopInfo = newLoopInfo; - } // Collapse done - - // Update the stack frame created for this loop to point to the resulting - // loop after applying transformations. - if (NewTopLoopInfo) { - moduleTranslation.stackWalk( - [&](OpenMPLoopInfoStackFrame &frame) { - frame.loopInfo = NewTopLoopInfo; - return WalkResult::interrupt(); - }); - } + } // Tiling done. + + // Do collapse. + const auto &numCollapse = loopOp.getCollapseNumLoops(); + SmallVector collapseLoopInfos( + loopInfos.begin(), loopInfos.begin() + (numCollapse)); + + auto newTopLoopInfo = + ompBuilder->collapseLoops(ompLoc.DL, collapseLoopInfos, {}); + + assert(newTopLoopInfo && "New top loop information is missing"); + moduleTranslation.stackWalk( + [&](OpenMPLoopInfoStackFrame &frame) { + frame.loopInfo = newTopLoopInfo; + return WalkResult::interrupt(); + }); // Continue building IR after the loop. Note that the LoopInfo returned by // `collapseLoops` points inside the outermost loop and is intended for From a646a1d795267d4bf759f1f711851ead3e029ec9 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 12 Sep 2025 00:23:12 +0200 Subject: [PATCH 47/64] Add tests --- flang/lib/Lower/OpenMP/Utils.cpp | 1 - .../parallel-wsloop-intdo-collapse.f90 | 33 +++++++++++++++++++ .../parallel-wsloop-intdo-permutation.f90 | 33 +++++++++++++++++++ 3 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-collapse.f90 create mode 100644 openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-permutation.f90 diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 30fff48181d73..7aa8c30ebd679 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -607,7 +607,6 @@ void collectTileSizesFromOpenMPConstruct( Fortran::semantics::SemanticsContext &semaCtx) { if (!ompCons) return; - if (auto *ompLoop{std::get_if(&ompCons->u)}) { const auto &nestedOptional = std::get>(ompLoop->t); diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-collapse.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-collapse.f90 new file mode 100644 index 0000000000000..4285edaa775b8 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-collapse.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP PARALLEL DO COLLAPSE(2) NUM_THREADS(1) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-permutation.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-permutation.f90 new file mode 100644 index 0000000000000..e52389f2448e4 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-permutation.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP PARALLEL DO NUM_THREADS(1) + !$OMP INTERCHANGE PERMUTATION(2,1) + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done From 9c7155ae7a8471ba27a039bf4f4a2f7faed32052 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 12 Sep 2025 00:29:26 +0200 Subject: [PATCH 48/64] Reduce diff --- flang/lib/Lower/OpenMP/ClauseProcessor.cpp | 7 ++++ flang/lib/Lower/OpenMP/ClauseProcessor.h | 2 ++ flang/lib/Lower/OpenMP/OpenMP.cpp | 39 ++-------------------- flang/lib/Lower/OpenMP/Utils.cpp | 3 +- flang/lib/Semantics/resolve-directives.cpp | 1 - 5 files changed, 14 insertions(+), 38 deletions(-) diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index 23f0ca14e931d..96e21872e4643 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -522,6 +522,13 @@ bool ClauseProcessor::processProcBind( return false; } +bool ClauseProcessor::processTileSizes( + lower::pft::Evaluation &eval, mlir::omp::LoopNestOperands &result) const { + auto *ompCons{eval.getIf()}; + collectTileSizesFromOpenMPConstruct(ompCons, result.tileSizes, semaCtx); + return !result.tileSizes.empty(); +} + bool ClauseProcessor::processSafelen( mlir::omp::SafelenClauseOps &result) const { if (auto *clause = findUniqueClause()) { diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h index c46bdb348a3ef..01ac15a1ffc71 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.h +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h @@ -98,6 +98,8 @@ class ClauseProcessor { bool processPriority(lower::StatementContext &stmtCtx, mlir::omp::PriorityClauseOps &result) const; bool processProcBind(mlir::omp::ProcBindClauseOps &result) const; + bool processTileSizes(lower::pft::Evaluation &eval, + mlir::omp::LoopNestOperands &result) const; bool processSafelen(mlir::omp::SafelenClauseOps &result) const; bool processSchedule(lower::StatementContext &stmtCtx, mlir::omp::ScheduleClauseOps &result) const; diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index a1caabbe8cf55..d301ceec555e0 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -406,7 +406,6 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, return; const parser::OmpClauseList *beginClauseList = nullptr; - const parser::OmpClauseList *middleClauseList = nullptr; const parser::OmpClauseList *endClauseList = nullptr; common::visit( common::visitors{ @@ -421,30 +420,6 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, beginClauseList = &std::get(beginDirective.t); - // For now we check if there is an inner OpenMPLoopConstruct, and - // extract the size clause from there - const auto &nestedOptional = - std::get>( - ompConstruct.t); - assert(nestedOptional.has_value() && - "Expected a DoConstruct or OpenMPLoopConstruct"); - const auto *innerConstruct = - std::get_if>( - &(nestedOptional.value())); - if (innerConstruct) { - const auto &innerLoopConstruct = innerConstruct->value(); - const auto &innerBegin = - std::get( - innerLoopConstruct.t); - const auto &innerDirective = - std::get(innerBegin.t); - if (innerDirective.v == llvm::omp::Directive::OMPD_tile || - innerDirective.v == - llvm::omp::Directive::OMPD_interchange) { - middleClauseList = - &std::get(innerBegin.t); - } - } if (auto &endDirective = std::get>( ompConstruct.t)) { @@ -458,9 +433,6 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, assert(beginClauseList && "expected begin directive"); clauses.append(makeClauses(*beginClauseList, semaCtx)); - if (middleClauseList) - clauses.append(makeClauses(*middleClauseList, semaCtx)); - if (endClauseList) clauses.append(makeClauses(*endClauseList, semaCtx)); }; @@ -1626,11 +1598,7 @@ genLoopNestClauses(lower::AbstractConverter &converter, } } - llvm::SmallVector sizeValues; - auto *ompCons{eval.getIf()}; - collectTileSizesFromOpenMPConstruct(ompCons, sizeValues, semaCtx); - if (sizeValues.size() > 0) - clauseOps.tileSizes = sizeValues; + cp.processTileSizes(eval, clauseOps); } static void genLoopClauses( @@ -2009,8 +1977,7 @@ static mlir::omp::LoopNestOp genLoopNestOp( return llvm::SmallVector(iv); }; - uint64_t nestValue = getCollapseValue( - item->clauses); // MK: Should be number of affected loops? + uint64_t nestValue = getCollapseValue(item->clauses); nestValue = nestValue < iv.size() ? iv.size() : nestValue; auto *nestedEval = getCollapsedLoopEval(eval, nestValue); @@ -4065,7 +4032,7 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, switch (nestedDirective) { case llvm::omp::Directive::OMPD_tile: // Skip OMPD_tile since the tile sizes will be retrieved when - // generating the omp.looop_nest op. + // generating the omp.loop_nest op. break; case llvm::omp::Directive::OMPD_interchange: { ConstructQueue nestedQueue{buildConstructQueue( diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 7aa8c30ebd679..464cb46e59cf8 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -626,7 +626,7 @@ void collectTileSizesFromOpenMPConstruct( // Get the size values from parse tree and convert to a vector const auto &innerClauseList{ std::get(innerBegin.t)}; - for (const auto &clause : innerClauseList.v) + for (const auto &clause : innerClauseList.v) { if (const auto tclause{ std::get_if(&clause.u)}) { for (auto &tval : tclause->v) { @@ -634,6 +634,7 @@ void collectTileSizesFromOpenMPConstruct( tileSizes.push_back(*v); } } + } } } } diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 00d960914de31..649d97e081ac5 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -2157,7 +2157,6 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses( std::get_if(&clause.u)}) { levels.push_back(tclause->v.size()); clauses.push_back(&clause); - // llvm_unreachable("MK: fetch permute depth"); return; } } From ac411583a86ae5dc1216da470eaf9365b03a35f4 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 12 Sep 2025 00:59:38 +0200 Subject: [PATCH 49/64] Reduce diff size --- flang/lib/Lower/OpenMP/Utils.cpp | 12 +++++------- flang/lib/Lower/OpenMP/Utils.h | 2 +- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 464cb46e59cf8..035ffdaced2e7 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -681,12 +681,12 @@ void collectPermutationFromOpenMPConstruct( } } -bool collectLoopRelatedInfo( +int64_t collectLoopRelatedInfo( lower::AbstractConverter &converter, mlir::Location currentLocation, lower::pft::Evaluation &eval, const omp::List &clauses, mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl &iv) { - bool found = false; + int64_t numCollapse = 1; fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); // Collect the loops to collapse. @@ -699,10 +699,10 @@ bool collectLoopRelatedInfo( if (auto *clause = ClauseFinder::findUniqueClause(clauses)) { collapseValue = evaluate::ToInt64(clause->v).value(); - found = true; + numCollapse = collapseValue; } - // Collect sizes from tile directive if present + // Collect sizes from tile directive if present. std::int64_t sizesLengthValue = 0l; std::int64_t permutationLengthValue = 0l; if (auto *ompCons{eval.getIf()}) { @@ -729,7 +729,6 @@ bool collectLoopRelatedInfo( if (const auto tclause{ std::get_if(&clause.u)}) { sizesLengthValue = tclause->v.size(); - found = true; } } @@ -741,7 +740,6 @@ bool collectLoopRelatedInfo( if (const auto tclause{ std::get_if(&clause.u)}) { permutationLengthValue = tclause->v.size(); - found = true; } } // default: permution(2,1) @@ -791,7 +789,7 @@ bool collectLoopRelatedInfo( convertLoopBounds(converter, currentLocation, result, loopVarTypeSize); - return found; + return numCollapse; } } // namespace omp diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h index aea882dfb7dc6..a6b6e16bbbc8b 100644 --- a/flang/lib/Lower/OpenMP/Utils.h +++ b/flang/lib/Lower/OpenMP/Utils.h @@ -159,7 +159,7 @@ void genObjectList(const ObjectList &objects, void lastprivateModifierNotSupported(const omp::clause::Lastprivate &lastp, mlir::Location loc); -bool collectLoopRelatedInfo( +int64_t collectLoopRelatedInfo( lower::AbstractConverter &converter, mlir::Location currentLocation, lower::pft::Evaluation &eval, const omp::List &clauses, mlir::omp::LoopRelatedClauseOps &result, From 7a3c46c4ac200e6d4228f62d8603612e26522873 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 12 Sep 2025 01:05:49 +0200 Subject: [PATCH 50/64] Reduce diff size --- flang/lib/Lower/OpenMP/ClauseProcessor.cpp | 11 ++++++++--- flang/lib/Lower/OpenMP/ClauseProcessor.h | 3 ++- flang/lib/Lower/OpenMP/OpenMP.cpp | 8 ++++---- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index 96e21872e4643..a96884f5680ba 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -273,10 +273,15 @@ bool ClauseProcessor::processCancelDirectiveName( bool ClauseProcessor::processCollapse( mlir::Location currentLocation, lower::pft::Evaluation &eval, - mlir::omp::LoopRelatedClauseOps &result, + mlir::omp::LoopRelatedClauseOps &loopResult, + mlir::omp::CollapseClauseOps &collapseResult, llvm::SmallVectorImpl &iv) const { - return collectLoopRelatedInfo(converter, currentLocation, eval, clauses, - result, iv); + + int64_t numCollapse = collectLoopRelatedInfo(converter, currentLocation, eval, + clauses, loopResult, iv); + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + collapseResult.collapseNumLoops = firOpBuilder.getI64IntegerAttr(numCollapse); + return numCollapse > 1; } bool ClauseProcessor::processDevice(lower::StatementContext &stmtCtx, diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h index 01ac15a1ffc71..324ea3c1047a5 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.h +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h @@ -63,7 +63,8 @@ class ClauseProcessor { mlir::omp::CancelDirectiveNameClauseOps &result) const; bool processCollapse(mlir::Location currentLocation, lower::pft::Evaluation &eval, - mlir::omp::LoopRelatedClauseOps &result, + mlir::omp::LoopRelatedClauseOps &loopResult, + mlir::omp::CollapseClauseOps &collapseResult, llvm::SmallVectorImpl &iv) const; bool processDevice(lower::StatementContext &stmtCtx, mlir::omp::DeviceClauseOps &result) const; diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index d301ceec555e0..529b375005c92 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -503,7 +503,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, [[fallthrough]]; case OMPD_distribute: case OMPD_distribute_simd: - cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv); + cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv); break; case OMPD_teams: @@ -522,7 +522,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, [[fallthrough]]; case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: - cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv); + cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv); cp.processNumTeams(stmtCtx, hostInfo->ops); break; @@ -533,7 +533,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, cp.processNumTeams(stmtCtx, hostInfo->ops); [[fallthrough]]; case OMPD_loop: - cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv); + cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv); break; case OMPD_teams_workdistribute: @@ -1573,7 +1573,7 @@ genLoopNestClauses(lower::AbstractConverter &converter, HostEvalInfo *hostEvalInfo = getHostEvalInfoStackTop(converter); if (!hostEvalInfo || !hostEvalInfo->apply(clauseOps, iv)) - cp.processCollapse(loc, eval, clauseOps, iv); + cp.processCollapse(loc, eval, clauseOps, clauseOps, iv); clauseOps.loopInclusive = converter.getFirOpBuilder().getUnitAttr(); From 6ed3cea30effab123d08bd8203ef5543edaf202d Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 12 Sep 2025 01:13:57 +0200 Subject: [PATCH 51/64] Reduce diff size --- flang/lib/Lower/OpenMP/Utils.cpp | 117 ++++++++++++++++--------------- 1 file changed, 60 insertions(+), 57 deletions(-) diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 035ffdaced2e7..6882d95ce3daf 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -599,12 +599,11 @@ static void convertLoopBounds(lower::AbstractConverter &converter, } } -/// Populates the sizes vector with values if the given OpenMPConstruct -/// Contains a loop construct with an inner tiling construct. -void collectTileSizesFromOpenMPConstruct( +// Helper function that finds the sizes clause in a inner OMPD_tile directive +// and passes the sizes clause to the callback function if found. +static void processTileSizesFromOpenMPConstruct( const parser::OpenMPConstruct *ompCons, - llvm::SmallVectorImpl &tileSizes, - Fortran::semantics::SemanticsContext &semaCtx) { + std::function processFun) { if (!ompCons) return; if (auto *ompLoop{std::get_if(&ompCons->u)}) { @@ -623,16 +622,14 @@ void collectTileSizesFromOpenMPConstruct( std::get(innerBegin.t).v; if (innerDirective == llvm::omp::Directive::OMPD_tile) { - // Get the size values from parse tree and convert to a vector + // Get the size values from parse tree and convert to a vector. const auto &innerClauseList{ std::get(innerBegin.t)}; for (const auto &clause : innerClauseList.v) { if (const auto tclause{ std::get_if(&clause.u)}) { - for (auto &tval : tclause->v) { - if (const auto v{EvaluateInt64(semaCtx, tval)}) - tileSizes.push_back(*v); - } + processFun(tclause); + break; } } } @@ -641,44 +638,17 @@ void collectTileSizesFromOpenMPConstruct( } /// Populates the sizes vector with values if the given OpenMPConstruct -/// Contains a loop construct with an inner tiling construct. -void collectPermutationFromOpenMPConstruct( +/// contains a loop construct with an inner tiling construct. +void collectTileSizesFromOpenMPConstruct( const parser::OpenMPConstruct *ompCons, - llvm::SmallVectorImpl &permutation, + llvm::SmallVectorImpl &tileSizes, Fortran::semantics::SemanticsContext &semaCtx) { - if (!ompCons) - return; - - if (auto *ompLoop{std::get_if(&ompCons->u)}) { - const auto &nestedOptional = - std::get>(ompLoop->t); - assert(nestedOptional.has_value() && - "Expected a DoConstruct or OpenMPLoopConstruct"); - const auto *innerConstruct = - std::get_if>( - &(nestedOptional.value())); - if (innerConstruct) { - const auto &innerLoopDirective = innerConstruct->value(); - const auto &innerBegin = - std::get(innerLoopDirective.t); - const auto &innerDirective = - std::get(innerBegin.t).v; - - if (innerDirective == llvm::omp::Directive::OMPD_interchange) { - // Get the size values from parse tree and convert to a vector - const auto &innerClauseList{ - std::get(innerBegin.t)}; - for (const auto &clause : innerClauseList.v) - if (const auto tclause{ - std::get_if(&clause.u)}) { - for (auto &tval : tclause->v) { - if (const auto v{EvaluateInt64(semaCtx, tval)}) - permutation.push_back(*v); - } - } - } - } - } + processTileSizesFromOpenMPConstruct( + ompCons, [&](const parser::OmpClause::Sizes *tclause) { + for (auto &tval : tclause->v) + if (const auto v{EvaluateInt64(semaCtx, tval)}) + tileSizes.push_back(*v); + }); } int64_t collectLoopRelatedInfo( @@ -706,6 +676,11 @@ int64_t collectLoopRelatedInfo( std::int64_t sizesLengthValue = 0l; std::int64_t permutationLengthValue = 0l; if (auto *ompCons{eval.getIf()}) { + processTileSizesFromOpenMPConstruct( + ompCons, [&](const parser::OmpClause::Sizes *tclause) { + sizesLengthValue = tclause->v.size(); + }); + if (auto *ompLoop{std::get_if(&ompCons->u)}) { const auto &nestedOptional = std::get>(ompLoop->t); @@ -721,17 +696,6 @@ int64_t collectLoopRelatedInfo( const auto &innerDirective = std::get(innerBegin.t).v; - if (innerDirective == llvm::omp::Directive::OMPD_tile) { - // Get the size values from parse tree and convert to a vector - const auto &innerClauseList{ - std::get(innerBegin.t)}; - for (const auto &clause : innerClauseList.v) - if (const auto tclause{ - std::get_if(&clause.u)}) { - sizesLengthValue = tclause->v.size(); - } - } - if (innerDirective == llvm::omp::Directive::OMPD_interchange) { // Get the size values from parse tree and convert to a vector const auto &innerClauseList{ @@ -792,6 +756,45 @@ int64_t collectLoopRelatedInfo( return numCollapse; } +void collectPermutationFromOpenMPConstruct( + const parser::OpenMPConstruct *ompCons, + llvm::SmallVectorImpl &permutation, + Fortran::semantics::SemanticsContext &semaCtx) { + if (!ompCons) + return; + + if (auto *ompLoop{std::get_if(&ompCons->u)}) { + const auto &nestedOptional = + std::get>(ompLoop->t); + assert(nestedOptional.has_value() && + "Expected a DoConstruct or OpenMPLoopConstruct"); + const auto *innerConstruct = + std::get_if>( + &(nestedOptional.value())); + if (innerConstruct) { + const auto &innerLoopDirective = innerConstruct->value(); + const auto &innerBegin = + std::get(innerLoopDirective.t); + const auto &innerDirective = + std::get(innerBegin.t).v; + + if (innerDirective == llvm::omp::Directive::OMPD_interchange) { + // Get the size values from parse tree and convert to a vector + const auto &innerClauseList{ + std::get(innerBegin.t)}; + for (const auto &clause : innerClauseList.v) + if (const auto tclause{ + std::get_if(&clause.u)}) { + for (auto &tval : tclause->v) { + if (const auto v{EvaluateInt64(semaCtx, tval)}) + permutation.push_back(*v); + } + } + } + } + } +} + } // namespace omp } // namespace lower } // namespace Fortran From 3660ee420684e2b108228bf8c5ffeb5f07a0c948 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 20 Sep 2025 03:37:56 +0200 Subject: [PATCH 52/64] Post-merge fixes --- flang/lib/Lower/OpenMP/OpenMP.cpp | 40 ++++++++-------------- flang/lib/Lower/OpenMP/Utils.cpp | 33 ++++++++---------- flang/lib/Parser/openmp-parsers.cpp | 4 +-- flang/lib/Parser/unparse.cpp | 3 -- flang/lib/Semantics/canonicalize-omp.cpp | 10 ++---- flang/lib/Semantics/resolve-directives.cpp | 11 +++--- 6 files changed, 39 insertions(+), 62 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index ebe49553811d1..2ded09f0c51fd 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -3983,35 +3983,27 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, const parser::OpenMPLoopConstruct &loopConstruct) { const parser::OmpDirectiveSpecification &beginSpec = loopConstruct.BeginDir(); List clauses = makeClauses(beginSpec.Clauses(), semaCtx); - if (auto &endSpec = loopConstruct.EndDir()) - clauses.append(makeClauses(endSpec->Clauses(), semaCtx)); + if (auto &endSpec = loopConstruct.EndDir()) clauses.append(makeClauses(endSpec->Clauses(), semaCtx)); mlir::Location currentLocation = converter.genLocation(beginSpec.source); - llvm::omp::Directive directive = - parser::omp::GetOmpDirectiveName(beginLoopDirective).v; - const parser::CharBlock &source = - std::get(beginLoopDirective.t).source; +// llvm::omp::Directive directive = Fortran::parser::omp::GetOmpDirectiveName(loopConstruct).v; + // parser::omp::GetOmpDirectiveName(beginLoopDirective).v; + //const parser::CharBlock &source = std::get(beginLoopDirective.t).source; - const parser::OmpDirectiveName &beginName = beginSpec.DirName(); - ConstructQueue queue{ - buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx, - eval, beginName.source, beginName.v, clauses)}; + const parser::OmpDirectiveName &beginName = beginSpec.DirName(); + ConstructQueue queue{ buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx, eval, beginName.source, beginName.v, clauses)}; - auto &optLoopCons = - std::get>(loopConstruct.t); + auto &optLoopCons = std::get>(loopConstruct.t); if (optLoopCons.has_value()) { - if (auto *ompNestedLoopCons{ - std::get_if>( - &*optLoopCons)}) { - const Fortran::parser::OpenMPLoopConstruct &x = - ompNestedLoopCons->value(); - const Fortran::parser::OmpBeginLoopDirective &y = std::get<0>(x.t); + if (auto *ompNestedLoopCons{ std::get_if>( &*optLoopCons)}) { + const Fortran::parser::OpenMPLoopConstruct &x = ompNestedLoopCons->value(); + // const Fortran::parser::OmpBeginLoopDirective &y = std::get<0>(x.t); // const Fortran::parser::OmpClauseList &clauseList = std::get<1>(y.t); - llvm::omp::Directive nestedDirective = - parser::omp::GetOmpDirectiveName(*ompNestedLoopCons).v; - List nestedClauses = - makeClauses(std::get(y.t), semaCtx); + llvm::omp::Directive nestedDirective = parser::omp::GetOmpDirectiveName(*ompNestedLoopCons).v; + + List nestedClauses = makeClauses(x.BeginDir().Clauses(), semaCtx); + // makeClauses(std::get(y.t), semaCtx); switch (nestedDirective) { case llvm::omp::Directive::OMPD_tile: @@ -4019,9 +4011,7 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, // generating the omp.loop_nest op. break; case llvm::omp::Directive::OMPD_interchange: { - ConstructQueue nestedQueue{buildConstructQueue( - converter.getFirOpBuilder().getModule(), semaCtx, eval, source, - nestedDirective, nestedClauses)}; + ConstructQueue nestedQueue{buildConstructQueue( converter.getFirOpBuilder().getModule(), semaCtx, eval, beginName.source, nestedDirective, nestedClauses)}; for (auto nl : nestedQueue) { queue.push_back(nl); } diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index a5e9060734cdb..d4f3eefdeaf2e 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -677,24 +677,19 @@ int64_t collectLoopRelatedInfo( }); if (auto *ompLoop{std::get_if(&ompCons->u)}) { - const auto &nestedOptional = - std::get>(ompLoop->t); - assert(nestedOptional.has_value() && - "Expected a DoConstruct or OpenMPLoopConstruct"); - const auto *innerConstruct = - std::get_if>( - &(nestedOptional.value())); + const auto &nestedOptional = std::get>(ompLoop->t); + assert(nestedOptional.has_value() && "Expected a DoConstruct or OpenMPLoopConstruct"); + const auto *innerConstruct = std::get_if>( &(nestedOptional.value())); if (innerConstruct) { const auto &innerLoopDirective = innerConstruct->value(); - const auto &innerBegin = - std::get(innerLoopDirective.t); - const auto &innerDirective = - std::get(innerBegin.t).v; + const auto &innerBegin = std::get(innerLoopDirective.t); + const auto &innerDirective = Fortran::parser::omp::GetOmpDirectiveName(innerBegin).v; + //std::get(innerBegin.t).v; if (innerDirective == llvm::omp::Directive::OMPD_interchange) { // Get the size values from parse tree and convert to a vector - const auto &innerClauseList{ - std::get(innerBegin.t)}; + const auto &innerClauseList { innerBegin.Clauses() }; + // const auto &innerClauseList{ std::get(innerBegin.t)}; for (const auto &clause : innerClauseList.v) { if (const auto tclause{ std::get_if(&clause.u)}) { @@ -768,15 +763,15 @@ void collectPermutationFromOpenMPConstruct( &(nestedOptional.value())); if (innerConstruct) { const auto &innerLoopDirective = innerConstruct->value(); - const auto &innerBegin = - std::get(innerLoopDirective.t); - const auto &innerDirective = - std::get(innerBegin.t).v; + const auto &innerBegin = innerLoopDirective.BeginDir(); + //std::get(innerLoopDirective.t); + const auto &innerDirective = Fortran::parser::omp::GetOmpDirectiveName(innerLoopDirective).v; + //std::get(innerBegin.t).v; if (innerDirective == llvm::omp::Directive::OMPD_interchange) { // Get the size values from parse tree and convert to a vector - const auto &innerClauseList{ - std::get(innerBegin.t)}; + const auto &innerClauseList{ innerBegin.Clauses() }; + //std::get(innerBegin.t)}; for (const auto &clause : innerClauseList.v) if (const auto tclause{ std::get_if(&clause.u)}) { diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index b2ab9ce8551ac..cfe42cb34653f 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -1366,7 +1366,6 @@ TYPE_PARSER(sourced(construct( TYPE_PARSER(sourced(construct( verbatim("METADIRECTIVE"_tok), Parser{}))) - "INTERCHANGE" >> pure(llvm::omp::Directive::OMPD_interchange), static inline constexpr auto IsDirective(llvm::omp::Directive dir) { return [dir](const OmpDirectiveName &name) -> bool { return dir == name.v; }; } @@ -1954,7 +1953,8 @@ static constexpr DirectiveSet GetLoopDirectives() { unsigned(Directive::OMPD_teams_distribute_simd), unsigned(Directive::OMPD_teams_loop), unsigned(Directive::OMPD_tile), - unsigned(Directive::OMPD_unroll), + unsigned(Directive::OMPD_unroll), + unsigned(Directive::OMPD_interchange), }; return loopDirectives; } diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index 02dfc3bf4955d..73bbbc04f46b1 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2403,9 +2403,6 @@ class UnparseVisitor { } #define GEN_FLANG_CLAUSE_UNPARSE #include "llvm/Frontend/OpenMP/OMP.inc" - case llvm::omp::Directive::OMPD_interchange: - Word("INTERCHANGE "); - break; void Unparse(const OmpObjectList &x) { Walk(x.v, ","); } void Unparse(const common::OmpMemoryOrderType &x) { diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index bb5bbb3c3be8d..b34296271f79a 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -177,12 +177,8 @@ class CanonicalizationOfOmp { // OpenMP Loop Construct and the DO loop itself auto &nestedBeginDirective = ompLoopCons->BeginDir(); auto &nestedBeginName = nestedBeginDirective.DirName(); - if ((nestedBeginName.v == llvm::omp::Directive::OMPD_unroll || - nestedBeginName.v == llvm::omp::Directive::OMPD_tile) && - !(nestedBeginName.v == llvm::omp::Directive::OMPD_unroll && - beginName.v == llvm::omp::Directive::OMPD_tile|| - beginName.v == - llvm::omp::Directive::OMPD_interchange)) { + if ((nestedBeginName.v == llvm::omp::Directive::OMPD_unroll || nestedBeginName.v == llvm::omp::Directive::OMPD_tile || nestedBeginName.v == llvm::omp::Directive::OMPD_interchange) && + !(nestedBeginName.v == llvm::omp::Directive::OMPD_unroll &&beginName.v == llvm::omp::Directive::OMPD_tile)) { // iterate through the remaining block items to find the end directive // for the unroll/tile directive. parser::Block::iterator endIt; @@ -207,7 +203,7 @@ class CanonicalizationOfOmp { common::Indirection{std::move(*ompLoopCons)}}}; nextIt = block.erase(nextIt); } else if (nestedBeginName.v == llvm::omp::Directive::OMPD_unroll && - beginName.v == llvm::omp::Directive::OMPD_tile) { + beginName.v == llvm::omp::Directive::OMPD_tile ) { // if a loop has been unrolled, the user can not then tile that loop // as it has been unrolled const parser::OmpClauseList &unrollClauseList{ diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 2961fdf52709f..254e4a2f47b4d 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -2126,12 +2126,11 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses( const parser::OpenMPLoopConstruct &y, const parser::OmpClauseList &x, llvm::SmallVector &levels, llvm::SmallVector &clauses) { - const auto &beginLoopDir{std::get(y.t)}; - auto &&yt = std::get<0>(beginLoopDir.t); - - const auto &beginDir{std::get(beginLoopDir.t)}; - const auto &dirClauses{std::get(beginLoopDir.t)}; - auto ytv = beginDir.v; + const auto &beginLoopDir{ y.BeginDir() }; + const auto &dirClauses{ beginLoopDir.Clauses()}; +// const auto &beginLoopDir{std::get(y.t)}; +// const auto &dirClauses{std::get(beginLoopDir.t)}; + auto ytv = Fortran::parser::omp::GetOmpDirectiveName (y).v; for (const auto &clause : x.v) { if (const auto oclause{ From 82ea715367651dc7670b5a727bcae4e72e677f23 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 20 Sep 2025 03:44:27 +0200 Subject: [PATCH 53/64] cleanup --- flang/lib/Lower/OpenMP/OpenMP.cpp | 36 +++++++++---------- flang/lib/Lower/OpenMP/Utils.cpp | 31 ++++++++-------- flang/lib/Parser/openmp-parsers.cpp | 2 +- flang/lib/Semantics/CMakeLists.txt | 42 +++++++++------------- flang/lib/Semantics/canonicalize-omp.cpp | 9 +++-- flang/lib/Semantics/resolve-directives.cpp | 10 +++--- 6 files changed, 63 insertions(+), 67 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 2ded09f0c51fd..de2dea9bbecbe 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -3983,27 +3983,28 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, const parser::OpenMPLoopConstruct &loopConstruct) { const parser::OmpDirectiveSpecification &beginSpec = loopConstruct.BeginDir(); List clauses = makeClauses(beginSpec.Clauses(), semaCtx); - if (auto &endSpec = loopConstruct.EndDir()) clauses.append(makeClauses(endSpec->Clauses(), semaCtx)); + if (auto &endSpec = loopConstruct.EndDir()) + clauses.append(makeClauses(endSpec->Clauses(), semaCtx)); mlir::Location currentLocation = converter.genLocation(beginSpec.source); -// llvm::omp::Directive directive = Fortran::parser::omp::GetOmpDirectiveName(loopConstruct).v; - // parser::omp::GetOmpDirectiveName(beginLoopDirective).v; - //const parser::CharBlock &source = std::get(beginLoopDirective.t).source; - const parser::OmpDirectiveName &beginName = beginSpec.DirName(); - ConstructQueue queue{ buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx, eval, beginName.source, beginName.v, clauses)}; + ConstructQueue queue{ + buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx, + eval, beginName.source, beginName.v, clauses)}; - auto &optLoopCons = std::get>(loopConstruct.t); + auto &optLoopCons = + std::get>(loopConstruct.t); if (optLoopCons.has_value()) { - if (auto *ompNestedLoopCons{ std::get_if>( &*optLoopCons)}) { - const Fortran::parser::OpenMPLoopConstruct &x = ompNestedLoopCons->value(); - // const Fortran::parser::OmpBeginLoopDirective &y = std::get<0>(x.t); - // const Fortran::parser::OmpClauseList &clauseList = std::get<1>(y.t); - llvm::omp::Directive nestedDirective = parser::omp::GetOmpDirectiveName(*ompNestedLoopCons).v; - + if (auto *ompNestedLoopCons{ + std::get_if>( + &*optLoopCons)}) { + const Fortran::parser::OpenMPLoopConstruct &x = + ompNestedLoopCons->value(); + llvm::omp::Directive nestedDirective = + parser::omp::GetOmpDirectiveName(*ompNestedLoopCons).v; + List nestedClauses = makeClauses(x.BeginDir().Clauses(), semaCtx); - // makeClauses(std::get(y.t), semaCtx); switch (nestedDirective) { case llvm::omp::Directive::OMPD_tile: @@ -4011,7 +4012,9 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, // generating the omp.loop_nest op. break; case llvm::omp::Directive::OMPD_interchange: { - ConstructQueue nestedQueue{buildConstructQueue( converter.getFirOpBuilder().getModule(), semaCtx, eval, beginName.source, nestedDirective, nestedClauses)}; + ConstructQueue nestedQueue{buildConstructQueue( + converter.getFirOpBuilder().getModule(), semaCtx, eval, + beginName.source, nestedDirective, nestedClauses)}; for (auto nl : nestedQueue) { queue.push_back(nl); } @@ -4026,9 +4029,6 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, } } } - - - genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue, queue.begin()); diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index d4f3eefdeaf2e..e4d2aeef6ba59 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -677,19 +677,23 @@ int64_t collectLoopRelatedInfo( }); if (auto *ompLoop{std::get_if(&ompCons->u)}) { - const auto &nestedOptional = std::get>(ompLoop->t); - assert(nestedOptional.has_value() && "Expected a DoConstruct or OpenMPLoopConstruct"); - const auto *innerConstruct = std::get_if>( &(nestedOptional.value())); + const auto &nestedOptional = + std::get>(ompLoop->t); + assert(nestedOptional.has_value() && + "Expected a DoConstruct or OpenMPLoopConstruct"); + const auto *innerConstruct = + std::get_if>( + &(nestedOptional.value())); if (innerConstruct) { const auto &innerLoopDirective = innerConstruct->value(); - const auto &innerBegin = std::get(innerLoopDirective.t); - const auto &innerDirective = Fortran::parser::omp::GetOmpDirectiveName(innerBegin).v; - //std::get(innerBegin.t).v; + const auto &innerBegin = + std::get(innerLoopDirective.t); + const auto &innerDirective = + Fortran::parser::omp::GetOmpDirectiveName(innerBegin).v; if (innerDirective == llvm::omp::Directive::OMPD_interchange) { // Get the size values from parse tree and convert to a vector - const auto &innerClauseList { innerBegin.Clauses() }; - // const auto &innerClauseList{ std::get(innerBegin.t)}; + const auto &innerClauseList{innerBegin.Clauses()}; for (const auto &clause : innerClauseList.v) { if (const auto tclause{ std::get_if(&clause.u)}) { @@ -763,15 +767,14 @@ void collectPermutationFromOpenMPConstruct( &(nestedOptional.value())); if (innerConstruct) { const auto &innerLoopDirective = innerConstruct->value(); - const auto &innerBegin = innerLoopDirective.BeginDir(); - //std::get(innerLoopDirective.t); - const auto &innerDirective = Fortran::parser::omp::GetOmpDirectiveName(innerLoopDirective).v; - //std::get(innerBegin.t).v; + const auto &innerBegin = innerLoopDirective.BeginDir(); + const auto &innerDirective = + Fortran::parser::omp::GetOmpDirectiveName(innerLoopDirective).v; if (innerDirective == llvm::omp::Directive::OMPD_interchange) { // Get the size values from parse tree and convert to a vector - const auto &innerClauseList{ innerBegin.Clauses() }; - //std::get(innerBegin.t)}; + const auto &innerClauseList{innerBegin.Clauses()}; + // std::get(innerBegin.t)}; for (const auto &clause : innerClauseList.v) if (const auto tclause{ std::get_if(&clause.u)}) { diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index cfe42cb34653f..ca45bb0386ef2 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -1953,7 +1953,7 @@ static constexpr DirectiveSet GetLoopDirectives() { unsigned(Directive::OMPD_teams_distribute_simd), unsigned(Directive::OMPD_teams_loop), unsigned(Directive::OMPD_tile), - unsigned(Directive::OMPD_unroll), + unsigned(Directive::OMPD_unroll), unsigned(Directive::OMPD_interchange), }; return loopDirectives; diff --git a/flang/lib/Semantics/CMakeLists.txt b/flang/lib/Semantics/CMakeLists.txt index 414b59812aa72..109bc2dbb8569 100644 --- a/flang/lib/Semantics/CMakeLists.txt +++ b/flang/lib/Semantics/CMakeLists.txt @@ -1,4 +1,10 @@ -add_flang_library(FortranSemanticsChecks PARTIAL_SOURCES_INTENDED +add_flang_library(FortranSemantics + assignment.cpp + attr.cpp + canonicalize-acc.cpp + canonicalize-directives.cpp + canonicalize-do.cpp + canonicalize-omp.cpp check-acc-structure.cpp check-allocate.cpp check-arithmeticif.cpp @@ -23,30 +29,6 @@ add_flang_library(FortranSemanticsChecks PARTIAL_SOURCES_INTENDED check-select-rank.cpp check-select-type.cpp check-stop.cpp - - DEPENDS - acc_gen - omp_gen - - LINK_LIBS - FortranSupport - FortranParser - FortranEvaluate - - LINK_COMPONENTS - Support - FrontendOpenMP - FrontendOpenACC - TargetParser -) - -add_flang_library(FortranSemantics PARTIAL_SOURCES_INTENDED - assignment.cpp - attr.cpp - canonicalize-acc.cpp - canonicalize-directives.cpp - canonicalize-do.cpp - canonicalize-omp.cpp compute-offsets.cpp data-to-inits.cpp definable.cpp @@ -79,7 +61,6 @@ add_flang_library(FortranSemantics PARTIAL_SOURCES_INTENDED FortranSupport FortranParser FortranEvaluate - FortranSemanticsChecks LINK_COMPONENTS Support @@ -87,3 +68,12 @@ add_flang_library(FortranSemantics PARTIAL_SOURCES_INTENDED FrontendOpenACC TargetParser ) + +target_precompile_headers(FortranSemantics PRIVATE + [["flang/Semantics/semantics.h"]] + [["flang/Semantics/type.h"]] + [["flang/Semantics/openmp-modifiers.h"]] + [["flang/Semantics/expression.h"]] + [["flang/Semantics/tools.h"]] + [["flang/Semantics/symbol.h"]] +) diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index b34296271f79a..087a8c53f2afc 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -177,8 +177,11 @@ class CanonicalizationOfOmp { // OpenMP Loop Construct and the DO loop itself auto &nestedBeginDirective = ompLoopCons->BeginDir(); auto &nestedBeginName = nestedBeginDirective.DirName(); - if ((nestedBeginName.v == llvm::omp::Directive::OMPD_unroll || nestedBeginName.v == llvm::omp::Directive::OMPD_tile || nestedBeginName.v == llvm::omp::Directive::OMPD_interchange) && - !(nestedBeginName.v == llvm::omp::Directive::OMPD_unroll &&beginName.v == llvm::omp::Directive::OMPD_tile)) { + if ((nestedBeginName.v == llvm::omp::Directive::OMPD_unroll || + nestedBeginName.v == llvm::omp::Directive::OMPD_tile || + nestedBeginName.v == llvm::omp::Directive::OMPD_interchange) && + !(nestedBeginName.v == llvm::omp::Directive::OMPD_unroll && + beginName.v == llvm::omp::Directive::OMPD_tile)) { // iterate through the remaining block items to find the end directive // for the unroll/tile directive. parser::Block::iterator endIt; @@ -203,7 +206,7 @@ class CanonicalizationOfOmp { common::Indirection{std::move(*ompLoopCons)}}}; nextIt = block.erase(nextIt); } else if (nestedBeginName.v == llvm::omp::Directive::OMPD_unroll && - beginName.v == llvm::omp::Directive::OMPD_tile ) { + beginName.v == llvm::omp::Directive::OMPD_tile) { // if a loop has been unrolled, the user can not then tile that loop // as it has been unrolled const parser::OmpClauseList &unrollClauseList{ diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 254e4a2f47b4d..5a29f3245b4db 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -2126,11 +2126,11 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses( const parser::OpenMPLoopConstruct &y, const parser::OmpClauseList &x, llvm::SmallVector &levels, llvm::SmallVector &clauses) { - const auto &beginLoopDir{ y.BeginDir() }; - const auto &dirClauses{ beginLoopDir.Clauses()}; -// const auto &beginLoopDir{std::get(y.t)}; -// const auto &dirClauses{std::get(beginLoopDir.t)}; - auto ytv = Fortran::parser::omp::GetOmpDirectiveName (y).v; + const auto &beginLoopDir{y.BeginDir()}; + const auto &dirClauses{beginLoopDir.Clauses()}; + // const auto &beginLoopDir{std::get(y.t)}; + // const auto &dirClauses{std::get(beginLoopDir.t)}; + auto ytv = Fortran::parser::omp::GetOmpDirectiveName(y).v; for (const auto &clause : x.v) { if (const auto oclause{ From 6a8b6cc372b8469c558ba3f81dd9017cb97224c3 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 20 Sep 2025 15:14:21 +0200 Subject: [PATCH 54/64] cleanup --- flang/lib/Semantics/resolve-directives.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 5a29f3245b4db..f9dde64c4038e 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -2128,8 +2128,6 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses( llvm::SmallVector &clauses) { const auto &beginLoopDir{y.BeginDir()}; const auto &dirClauses{beginLoopDir.Clauses()}; - // const auto &beginLoopDir{std::get(y.t)}; - // const auto &dirClauses{std::get(beginLoopDir.t)}; auto ytv = Fortran::parser::omp::GetOmpDirectiveName(y).v; for (const auto &clause : x.v) { From eb98bbe2f4f4f1628ff76d405e26a3c578e00125 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 4 Oct 2025 00:21:26 +0200 Subject: [PATCH 55/64] [OpenMP][test] .f90 -> .F90 The test makes use of the preprocessor, which requires a .F90 suffix --- openmp/runtime/test/transform/tile/{intfor.f90 => intfor.F90} | 1 + 1 file changed, 1 insertion(+) rename openmp/runtime/test/transform/tile/{intfor.f90 => intfor.F90} (98%) diff --git a/openmp/runtime/test/transform/tile/intfor.f90 b/openmp/runtime/test/transform/tile/intfor.F90 similarity index 98% rename from openmp/runtime/test/transform/tile/intfor.f90 rename to openmp/runtime/test/transform/tile/intfor.F90 index dac0de6a99021..4ca9f14fdae9f 100644 --- a/openmp/runtime/test/transform/tile/intfor.f90 +++ b/openmp/runtime/test/transform/tile/intfor.F90 @@ -10,6 +10,7 @@ ! RUN: %t-ub18.exe | FileCheck %s --match-full-lines program tile_intfor_1d + implicit none integer i print *, 'do' From 0d7030f641c2de155fe9736006735a3de448b885 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 4 Oct 2025 18:39:29 +0200 Subject: [PATCH 56/64] post-merge fix --- flang/lib/Lower/OpenMP/Utils.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 73cf26431233a..10e5114bcb880 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -726,12 +726,7 @@ void collectLoopRelatedInfo( } } - int64_t collapseValue = collapseValue - sizesLengthValue; - if (sizesLengthValue > collapseValue) - collapseValue = sizesLengthValue; - if (permutationLengthValue > collapseValue) - collapseValue = permutationLengthValue; - +std::int64_t collapseValue = std::max({numCollapse, sizesLengthValue, permutationLengthValue}); std::size_t loopVarTypeSize = 0; do { lower::pft::Evaluation *doLoop = From 947f513be5d4c9d45393ba4abcecfe253eee0c1f Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 4 Oct 2025 19:19:12 +0200 Subject: [PATCH 57/64] Don't XFAIL intdo --- openmp/runtime/test/transform/tile/intdo.f90 | 1 - 1 file changed, 1 deletion(-) diff --git a/openmp/runtime/test/transform/tile/intdo.f90 b/openmp/runtime/test/transform/tile/intdo.f90 index 40e4c2c53f89c..27cd383a69fea 100644 --- a/openmp/runtime/test/transform/tile/intdo.f90 +++ b/openmp/runtime/test/transform/tile/intdo.f90 @@ -1,5 +1,4 @@ ! This test checks lowering of OpenMP tile directive -! XFAIL: * ! RUN: %flang %flags %openmp_flags -fopenmp-version=51 %s -o %t.exe ! RUN: %t.exe | FileCheck %s --match-full-lines From df93d0e7542e255ff25d487e277094e00bddfc52 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 4 Oct 2025 19:31:53 +0200 Subject: [PATCH 58/64] clang-format --- flang/lib/Lower/OpenMP/Utils.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 80999d842fe71..69ee1efd648d2 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -726,7 +726,8 @@ void collectLoopRelatedInfo( } } - std::int64_t collapseValue = std::max({numCollapse, sizesLengthValue,permutationLengthValue}); + std::int64_t collapseValue = + std::max({numCollapse, sizesLengthValue, permutationLengthValue}); std::size_t loopVarTypeSize = 0; do { lower::pft::Evaluation *doLoop = From dc8066a38d65c4ab41d0c213fa26a99f06b04ee9 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 27 Jan 2026 17:22:53 +0100 Subject: [PATCH 59/64] post-merge fixes --- flang/lib/Lower/OpenMP/Utils.cpp | 47 +++++++++++++------- flang/lib/Semantics/check-omp-loop.cpp | 37 ++++++++------- openmp/runtime/test/transform/tile/intdo.f90 | 1 - 3 files changed, 50 insertions(+), 35 deletions(-) diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 621ea99706a3f..73b44b27ead82 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -827,23 +827,36 @@ static bool processInterchangePermutationFromOpenMPConstruct( pft::Evaluation *getNestedDoConstruct(pft::Evaluation &eval) { - for (pft::Evaluation &nested : eval.getNestedEvaluations()) { - // In an OpenMPConstruct there can be compiler directives: - // 1 <> - // 2 CompilerDirective: !unroll - // <> -> 8 - if (nested.getIf()) - continue; - // Within a DoConstruct, there can be compiler directives, plus - // there is a DoStmt before the body: - // <> -> 8 - // 3 NonLabelDoStmt -> 7: do i = 1, n - // <> -> 7 - if (nested.getIf()) - continue; - assert(nested.getIf() && - "Unexpected construct in the nested evaluations"); - return &nested; + pft::Evaluation *curEval = &eval; + while (true) { + for (pft::Evaluation &nested : curEval->getNestedEvaluations()) { + // In an OpenMPConstruct there can be compiler directives: + // 1 <> + // 2 CompilerDirective: !unroll + // <> -> 8 + if (nested.getIf()) + continue; + // Within a DoConstruct, there can be compiler directives, plus + // there is a DoStmt before the body: + // <> -> 8 + // 3 NonLabelDoStmt -> 7: do i = 1, n + // <> -> 7 + if (nested.getIf()) + continue; + + if (nested.getIf()) + return &nested; + + // Follow innermost loop construct + if (auto &&ompCons = nested.getIf()) { + auto &&u = ompCons->u; + auto &&name = parser::omp::GetOmpDirectiveName(u); + curEval = &nested; + break; + } + + llvm_unreachable("Expected do loop to be in the nested evaluations"); + } } llvm_unreachable("Expected do loop to be in the nested evaluations"); } diff --git a/flang/lib/Semantics/check-omp-loop.cpp b/flang/lib/Semantics/check-omp-loop.cpp index 726dbe865834d..b98ab1f13bdc4 100644 --- a/flang/lib/Semantics/check-omp-loop.cpp +++ b/flang/lib/Semantics/check-omp-loop.cpp @@ -178,6 +178,24 @@ void OmpStructureChecker::HasInvalidLoopBinding( } } + +static bool IsLoopTransforming(llvm::omp::Directive dir) { + switch (dir) { + // TODO case llvm::omp::Directive::OMPD_flatten: + case llvm::omp::Directive::OMPD_fuse: + case llvm::omp::Directive::OMPD_interchange: + case llvm::omp::Directive::OMPD_nothing: + case llvm::omp::Directive::OMPD_reverse: + // TODO case llvm::omp::Directive::OMPD_split: + case llvm::omp::Directive::OMPD_stripe: + case llvm::omp::Directive::OMPD_tile: + case llvm::omp::Directive::OMPD_unroll: + return true; + default: + return false; + } +} + void OmpStructureChecker::CheckSIMDNest(const parser::OpenMPConstruct &c) { // Check the following: // The only OpenMP constructs that can be encountered during execution of @@ -225,7 +243,7 @@ void OmpStructureChecker::CheckSIMDNest(const parser::OpenMPConstruct &c) { const auto &beginName{c.BeginDir().DirName()}; if (beginName.v == llvm::omp::Directive::OMPD_simd || beginName.v == llvm::omp::Directive::OMPD_do_simd || - beginName.v == llvm::omp::Directive::OMPD_loop) { + beginName.v == llvm::omp::Directive::OMPD_loop || IsLoopTransforming(beginName.v)) { eligibleSIMD = true; } }, @@ -245,22 +263,7 @@ void OmpStructureChecker::CheckSIMDNest(const parser::OpenMPConstruct &c) { } } -static bool IsLoopTransforming(llvm::omp::Directive dir) { - switch (dir) { - // TODO case llvm::omp::Directive::OMPD_flatten: - case llvm::omp::Directive::OMPD_fuse: - case llvm::omp::Directive::OMPD_interchange: - case llvm::omp::Directive::OMPD_nothing: - case llvm::omp::Directive::OMPD_reverse: - // TODO case llvm::omp::Directive::OMPD_split: - case llvm::omp::Directive::OMPD_stripe: - case llvm::omp::Directive::OMPD_tile: - case llvm::omp::Directive::OMPD_unroll: - return true; - default: - return false; - } -} + void OmpStructureChecker::CheckNestedBlock(const parser::OpenMPLoopConstruct &x, const parser::Block &body, size_t &nestedCount) { diff --git a/openmp/runtime/test/transform/tile/intdo.f90 b/openmp/runtime/test/transform/tile/intdo.f90 index 40e4c2c53f89c..27cd383a69fea 100644 --- a/openmp/runtime/test/transform/tile/intdo.f90 +++ b/openmp/runtime/test/transform/tile/intdo.f90 @@ -1,5 +1,4 @@ ! This test checks lowering of OpenMP tile directive -! XFAIL: * ! RUN: %flang %flags %openmp_flags -fopenmp-version=51 %s -o %t.exe ! RUN: %t.exe | FileCheck %s --match-full-lines From eee41957175dc9cbe5c3702ccd2d81925a85815d Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 27 Jan 2026 18:53:56 +0100 Subject: [PATCH 60/64] clang-format --- flang/lib/Lower/OpenMP/Utils.cpp | 4 +- flang/lib/Semantics/check-omp-loop.cpp | 6 +- log.log | 12746 +++++++++++++++++++++++ 3 files changed, 12749 insertions(+), 7 deletions(-) create mode 100644 log.log diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 73b44b27ead82..13bde5cfa645f 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -822,9 +822,7 @@ static bool processInterchangePermutationFromOpenMPConstruct( } } return false; -} - - +} pft::Evaluation *getNestedDoConstruct(pft::Evaluation &eval) { pft::Evaluation *curEval = &eval; diff --git a/flang/lib/Semantics/check-omp-loop.cpp b/flang/lib/Semantics/check-omp-loop.cpp index ba7e555214022..590bbee833cd5 100644 --- a/flang/lib/Semantics/check-omp-loop.cpp +++ b/flang/lib/Semantics/check-omp-loop.cpp @@ -186,7 +186,6 @@ void OmpStructureChecker::HasInvalidLoopBinding( } } - static bool IsLoopTransforming(llvm::omp::Directive dir) { switch (dir) { // TODO case llvm::omp::Directive::OMPD_flatten: @@ -251,7 +250,8 @@ void OmpStructureChecker::CheckSIMDNest(const parser::OpenMPConstruct &c) { const auto &beginName{c.BeginDir().DirName()}; if (beginName.v == llvm::omp::Directive::OMPD_simd || beginName.v == llvm::omp::Directive::OMPD_do_simd || - beginName.v == llvm::omp::Directive::OMPD_loop || IsLoopTransforming(beginName.v)) { + beginName.v == llvm::omp::Directive::OMPD_loop || + IsLoopTransforming(beginName.v)) { eligibleSIMD = true; } }, @@ -271,8 +271,6 @@ void OmpStructureChecker::CheckSIMDNest(const parser::OpenMPConstruct &c) { } } - - void OmpStructureChecker::CheckNestedBlock( const parser::OpenMPLoopConstruct &x, const parser::Block &body) { for (auto &stmt : body) { diff --git a/log.log b/log.log new file mode 100644 index 0000000000000..7d8ffe7afc11f --- /dev/null +++ b/log.log @@ -0,0 +1,12746 @@ +commit 9be7c1037f26146e469c85061d6685a9172c5de9 +Author: Tom Eccles +Date: Mon Jan 26 14:06:46 2026 +0000 + + [flang][Lower] Fix UB in location handling (#177944) + + Previously `prov` received the address of a variable allocated in stack + memory (the contents of `include`). `prov` would then access that memory + outside of the lifetime of that stack allocation: leading to UB. + + This only manifested on thinLTO builds. No added test because + flang/test/Lower/location.f90 covers it (when thinLTO is enabled) and + there are bots guarding the thin-lto configuration. + + Fixes #156629 + Fixes #176404 + +commit 2f1b1f3a543f79774aa5463c956404d330143a06 +Author: Chi-Chun, Chen +Date: Sat Jan 24 13:19:13 2026 -0600 + + [flang][mlir][OpenMP] Support inbranch and notinbranch clause (#177310) + + Support inbranch and notinbranch clause for OpenMP declare simd + directive. + +commit 1036d782ae83b37578d6a810544a30f7453b79e9 +Author: Andre Kuhlenschmidt +Date: Thu Jan 22 08:58:10 2026 -0800 + + [flang][lowering] lowering assigned goto of allocatable variable (#175874) + + towards [#173594](https://github.com/llvm/llvm-project/issues/173594) + + This PR adds a lowering for `AssignedGoto`s that reference an + allocatable variable. + +commit 7a74e7fba33c8e016c79f0b4fa55ed908061019f +Author: Abid Qadeer +Date: Wed Jan 21 13:40:01 2026 +0000 + + [flang][OpenMP] Fix mapping of constant arrays. (#176763) + + The compiler skips mapping of named constants (parameters) to OpenMP + target regions under the assumption that constants don't need to be + mapped. This assumption is not valid when array is accessed inside with + dynamic index. The problem can be seen with the following code: + + ``` + module fir_lowering_check + implicit none + + integer, parameter :: dp = selected_real_kind(15, 307) + real(dp), parameter :: arrays(2) = (/ 0.0, 0.0 /) + + contains + + subroutine test(hold) + + integer, intent(in) :: hold + integer :: z + real(dp) :: temp + + !$omp target teams distribute parallel do + do z = 1, 2 + temp = arrays(hold) + end do + !$omp end target teams distribute parallel do + + end subroutine test + end module fir_lowering_check + + program main + use fir_lowering_check + + implicit none + integer :: hold + hold = 1 + call test(hold) + print *, "Finished" + + end program main + ``` + + It fails with the following error + `'hlfir.designate' op using value defined outside the region` + + The fix is to allow mapping of constant arrays and map them as `to`. + +commit d13119f26999533a3048db3db058e19ae02a76aa +Author: Chi-Chun, Chen +Date: Tue Jan 20 11:08:01 2026 -0600 + + [flang][mlir][OpenMP] Add support for uniform clause in declare simd (#176046) + + Define OpenMP uniform clause in mlir and emit it from flang. + +commit d542fac6b16406ec0ed0e168e6c3f4c6be28cff8 +Author: Krzysztof Parzyszek +Date: Tue Jan 20 09:57:35 2026 -0600 + + [flang] Add traits to more AST nodes (#175578) + + Follow-up to PR175211. + + There are still a few AST nodes that don't have any of the standard + traits (Wrapper/Tuple/etc). Because of that they require special + handling in the parse tree visitor. + + Convert a subset of these nodes to the typical format, and remove the + special cases from the parse tree visitor. + + The members of these nodes were frequently used, so instead of + extracting them by hand each time use helper member functions to access + them. + +commit b8fec8ebc632af1627c2c1d88b8c8b8957323a03 +Author: Krzysztof Parzyszek +Date: Tue Jan 20 08:00:00 2026 -0600 + + [flang] Add traits to more AST nodes (#175566) + + Follow-up to PR175211. + + There are still a few AST nodes that don't have any of the standard + traits (Wrapper/Tuple/etc). Because of that they require special + handling in the parse tree visitor. + + Convert a subset of these nodes to the typical format, and remove the + special cases from the parse tree visitor. + +commit 1c6d2add766288e2d0d2b089a2b66f7b0f285141 +Author: Chi-Chun, Chen +Date: Fri Jan 16 10:51:27 2026 -0600 + + [OpenMP][Flang][MLIR] Introduce omp.declare_simd op and emit from Flang (#175604) + + Changes: + - Adds a new `omp.declare_simd` operation to the OpenMP MLIR dialect + - Lowers Fortran `!$omp declare simd` into `omp.declare_simd` inside the + enclosing function body + + mlir to LLVMIR translation and uniform clause will be added in follow-up + PRs. + +commit b86c84ce6381bc1037f3893b063aa019af0fb8eb +Author: Kelvin Li +Date: Fri Jan 16 10:12:02 2026 -0500 + + [flang] Handle unused variable (NFC) (#176274) + +commit 1d4f9ac37c043198d823e85e3cd777dc970d8b75 +Author: Jean-Didier PAILLEUX +Date: Thu Jan 15 18:02:07 2026 +0100 + + [flang] Fix crash with coarray teams #171048 (#172259) + + This PR updates the `CHANGE TEAM` construct to fix the bug mentioned in + the issue #171048. + When a construct such as `IfConstruct` was present in the `CHANGE TEAM` + region, several BB were created but outside the region. + +commit ccbe36f16d88fc6608efb381ecfc8904a1f55437 +Author: khaki3 <47756807+khaki3@users.noreply.github.com> +Date: Mon Jan 12 09:45:47 2026 -0800 + + [flang][acc] Implement cache directive lowering (#174897) + + The `acc.cache` operation is currently defined to be associated with a + loop. However, this implementation generates `acc.cache` as a standalone + data entry operation inside the loop body. The `acc.cache` operation + definition should be updated in a future change to reflect this usage. + + Key implementation details: + - Add semantic checks to validate cache-specific constraints: at least + one bound must be specified for array sections, and only unit stride + is supported + - Use the shared `gatherDataOperandAddrAndBounds` infrastructure to + generate `acc.bounds` for cache operands, handling single elements + (`arr(i)`), full ranges (`arr(l:u)`), and partial ranges with missing + bounds (`arr(l:)` or `arr(:u)`) + - Set the data clause to `acc_cache` with the `readonly` modifier via + the `modifiers` attribute when the `readonly` clause is present + - Update the symbol map so subsequent lowering uses the cache result + - Insert cache operations after loop iterator setup + - Add symbol scope management for constructs inside `acc.loop` + +commit 6bfa042a10a04379261e35a710caadb1c53457c5 +Author: NimishMishra <42909663+NimishMishra@users.noreply.github.com> +Date: Fri Jan 9 06:51:29 2026 -0800 + + [flang][mlir] Add checks and test for linear clause on omp.wsloop and omp.simd (#174916) + + This PR adds additional checks and tests for linear clause on omp.wsloop + and omp.simd (both standalone and composite). For composite simd + constructs, the translation to LLVMIR uses the same + `LinearClauseProcessor` under `convertOmpSimd`, as already present in + previous PRs like https://github.com/llvm/llvm-project/pull/150386 and + https://github.com/llvm/llvm-project/pull/139386 + +commit 568b8e4873b2d04be7ab302246c8e3986ea61176 +Author: Krzysztof Parzyszek +Date: Thu Jan 8 18:26:48 2026 -0600 + + [flang] Add traits to several AST nodes (#175065) + + There are quite a few AST nodes that don't have any of the standard + traits (Wrapper/Tuple/etc). Because of that they require special + handling in the parse tree visitor. + + Convert a subset of these nodes to the typical format, and remove the + special cases from the parse tree visitor. + +commit 3f5d91bfbc17a487fc14ac2c7f2d866fb97e3906 +Author: Chi-Chun, Chen +Date: Tue Jan 6 11:10:03 2026 -0600 + + [Flang][OpenMP] Implement device clause lowering for target directive (#173509) + + Add lowering support for the OpenMP `device` clause on the `target` + directive in Flang. + + The device expression is propagated through MLIR OpenMP and passed to + the host-side `__tgt_target_kernel` call. + +commit 316a9c52f022024978775c9af40ba829d0564888 +Author: Abid Qadeer +Date: Mon Jan 5 14:46:49 2026 +0000 + + [flang] Ignore ambiguous use statement in use_stmt generation. (#174387) + + The https://github.com/llvm/llvm-project/pull/168106 caused build + failures in testcases which have ambiguous use statements. This PR fixes + that by properly ignoring them in `emitUseStatementsFromFunit`. + +commit 212527c00ba60aa5677a1b1acdd0f15b32b8fd01 +Author: Thirumalai Shaktivel <74826228+Thirumalai-Shaktivel@users.noreply.github.com> +Date: Mon Jan 5 13:24:10 2026 +0530 + + [Flang] Add FIR and LLVM lowering support for prefetch directive (#167272) + + Implementation details: + * Add PrefetchOp in FirOps + * Handle PrefetchOp in FIR Lowering and also pass required default + values + * Handle PrefetchOp in CodeGen.cpp + * Add required tests + +commit 11d9694b757b2e2c9f5169967fcc85f25f9a5645 +Author: NimishMishra <42909663+NimishMishra@users.noreply.github.com> +Date: Sat Jan 3 21:37:43 2026 -0800 + + [flang][mlir] Add support for implicit linearization in omp.simd (#150386) + + Up till OpenMP version 4.5, the loop iteration variable in the + associated do-construct of simd is linear with a linear step equal to + the increment of the loop. This PR implements this functionality. For + versions > 4.5, such an implicit linear clause is not assumed for the + loop iteration variable. + + Fixes https://github.com/llvm/llvm-project/issues/171006 + +commit c4387734322b9c050861454f799438346807c7b0 +Author: Victor Chernyakin +Date: Fri Jan 2 21:42:56 2026 -0700 + + [LLVM][ADT] Migrate users of `make_scope_exit` to CTAD (#174030) + + This is a followup to #173131, which introduced the CTAD functionality. + +commit fc9e6e13fd4bb7365e4b9659c08c3440688217ce +Author: Abid Qadeer +Date: Fri Jan 2 12:10:18 2026 +0000 + + [flang] Represent use statement in fir. (#168106) + + We have a longstanding issue in debug info that use statement is not + fully respected. The problem has been described in + https://github.com/llvm/llvm-project/issues/160923. This is first part + of the effort to address this issue. This PR adds infrastructure to emit + `use` statement information in FIR, which will be used by subsequent + patches to generate DWARF debug information. + + The information about use statement is collected during semantic + analysis and stored in `PreservedUseStmt` objects. During lowering, + `fir.use_stmt` operations are emitted for each `PreservedUseStmt` + object. The `fir.use_stmt` operation captures the module name, `only` + list symbols, and any renames specified in the use statement. The + `fir.use_stmt` is removed during `CodeGen`. + +commit 755f298ddcd43045be6eec1a9f15dc7ba820eecd +Author: Krzysztof Parzyszek +Date: Thu Dec 18 08:04:28 2025 -0600 + + [flang][OpenMP] Implement COMBINER clause (#172036) + + This adds parsing and lowering of the COMBINER clause. It utilizes the + existing lowering code for combiner-expression to lower the COMBINER + clause as well. + +commit 1451f3d9b008c76d66c215e5fb4ec3dde0f0d6ca +Author: Krzysztof Parzyszek +Date: Fri Dec 12 08:09:25 2025 -0600 + + [flang][OpenMP] Use StylizedInstance in converted clauses (#171907) + + Invent `StylizedInstance` class to store special variables together with + the instantiated expression in omp::clause::Initializer. This will + eliminate the need for visiting the original AST nodes in lowering to + MLIR. + +commit 3a0c006054f79dbb2c921003898b7242a37a38cb +Author: Tom Eccles +Date: Wed Dec 10 16:51:17 2025 +0000 + + Revert "[flang][OpenMP] Fix firstprivate not working with lastprivate in DO SIMD" (#171646) + + Reverts llvm/llvm-project#170163 + + Regression in fujitsu test suite + +commit 748e7af8dd6e9b4683a6402a0ca6598fe23a9c1e +Author: Krish Gupta +Date: Tue Dec 9 20:40:21 2025 +0530 + + [flang][OpenMP] Fix firstprivate not working with lastprivate in DO SIMD (#170163) + + This fixes a bug where firstprivate was ignored when the same variable + had both firstprivate and lastprivate clauses in a do simd construct. + + What was broken: + ``` + integer :: a + a = 10 + !$omp do simd firstprivate(a) lastprivate(a) + do i = 1, 1 + print *, a ! Should print 10, but printed garbage/0 + a = 20 + end do + !$omp end do simd + print *, a ! Correctly prints 20 + ``` + + Inside the loop, [a] wasn't being initialized from the firstprivate + clause—it just had whatever uninitialized value was there. + + The fix: + + In genCompositeDoSimd(), we were using simdItemDSP to handle + privatization for the whole loop nest. This only looked at SIMD clauses + and missed the firstprivate from the DO part. Changed it to use + wsloopItemDSP instead, which handles both DO clauses (firstprivate, + lastprivate) correctly. + + One line change in OpenMP.cpp + + Tests added: + + Lowering test to check MLIR generation + Runtime test to verify the actual values are correct + image + + + Fixes #168306 + + --------- + + Co-authored-by: Krish Gupta + +commit b360a782ca5da938d5e4f7c791508932a0bfc328 +Author: Akash Banerjee +Date: Fri Dec 5 17:38:41 2025 +0000 + + Reland "[Flang][OpenMP] Add lowering support for is_device_ptr clause (#169331)" (#170851) + + Add support for OpenMP is_device_ptr clause for target directives. + + [MLIR][OpenMP] Add OpenMPToLLVMIRTranslation support for is_device_ptr + #169367 This PR adds support for the OpenMP is_device_ptr clause in the + MLIR to LLVM IR translation for target regions. The is_device_ptr clause + allows device pointers (allocated via OpenMP runtime APIs) to be used + directly in target regions without implicit mapping. + +commit 290b32a699aefbd1f18fe78351655dd42ce98f1e +Author: NimishMishra <42909663+NimishMishra@users.noreply.github.com> +Date: Thu Dec 4 20:39:17 2025 -0800 + + [llvm][mlir][OpenMP] Support translation for linear clause in omp.wsloop and omp.simd (#139386) + + This patch adds support for LLVM translation of linear clause on + omp.wsloop (except for linear modifiers). + +commit be79a0d90ff700b717e72d7c200f58c5918e4301 +Author: theRonShark +Date: Thu Dec 4 19:38:16 2025 -0500 + + Revert "[Flang][OpenMP] Add lowering support for is_device_ptr clause" (#170778) + + Reverts llvm/llvm-project#169331 + +commit a77c4948a5681984accd3c6d35fb51c1c5571a50 +Author: Akash Banerjee +Date: Thu Dec 4 15:57:24 2025 +0000 + + [Flang][OpenMP] Add lowering support for is_device_ptr clause (#169331) + + Add support for OpenMP is_device_ptr clause for target directives. + + [MLIR][OpenMP] Add OpenMPToLLVMIRTranslation support for is_device_ptr #169367 + This PR adds support for the OpenMP is_device_ptr clause in the MLIR to LLVM IR translation for target regions. The is_device_ptr clause allows device pointers (allocated via OpenMP runtime APIs) to be used directly in target regions without implicit mapping. + +commit 5ccf8c90d1e4020d5f9bc255fe521aa0763f2b2b +Author: Tom Eccles +Date: Wed Dec 3 09:36:22 2025 +0000 + + [flang] implement VECTOR VECTORLENGTH directive (#170114) + + This should match exactly the llvm attributes generated by classic + flang. + +commit a09571ed5be3054b546b714c62c078b595d2f1cd +Author: jeanPerier +Date: Tue Dec 2 10:13:23 2025 +0100 + + [flang] represent ABSTRACT in fir.type_info (#170109) + + This patch keeps information about ABSTRACT derived types and DEFERRED + type bound procedures inside fir.type_info dispatch tables. + + This is part of the effort to delay generation of runtime type info + global by keeping the type information in a more condense fashion inside + fir.type_info (which is also easier to use for any potential + optimizations). + +commit d989ff93e2a073cb921cfcfeb9728a0b51892f1a +Author: Jan Leyonberg +Date: Fri Nov 28 09:00:18 2025 -0500 + + [flang][OpenMP] Add lowering of subroutine calls in custom reduction combiners (#169808) + + This patch adds support for lowering subroutine calls in custom + reduction combiners to MLIR. + +commit fd22706e937f7d2563cfa0e433dd735cc5284599 +Author: Krzysztof Parzyszek +Date: Tue Nov 25 17:30:28 2025 -0600 + + [flang][OpenMP] Skip compiler directives in getCollapsedLoopEval (#169565) + + Use `getNestedDoConstruct` from Utils to get the nested DoConstructs. + + Fixes https://github.com/llvm/llvm-project/issues/169532 + +commit 3e86f056217afbe46cd515b3d3c2f1dc7664bebf +Author: Jan Leyonberg +Date: Mon Nov 24 16:00:46 2025 -0500 + + [OpenMP][flang] Lowering of OpenMP custom reductions to MLIR (#168417) + + This patch add support for lowering of custom reductions to MLIR. It + also enhances the capability of the pass to automatically mark functions + as "declare target" by traversing custom reduction initializers and + combiners. + +commit c2d659b9b8efac9f80b8ebcb2b38b61295d82bdc +Author: Krzysztof Parzyszek +Date: Sat Nov 22 12:28:58 2025 -0600 + + [flang][OpenMP] Implement loop nest parser (#168884) + + Previously, loop constructs were parsed in a piece-wise manner: the + begin directive, the body, and the end directive were parsed separately. + Later on in canonicalization they were all coalesced into a loop + construct. To facilitate that end-loop directives were given a special + treatment, namely they were parsed as OpenMP constructs. As a result + syntax errors caused by misplaced end-loop directives were handled + differently from those cause by misplaced non-loop end directives. + + The new loop nest parser constructs the complete loop construct, + removing the need for the canonicalization step. Additionally, it is the + basis for parsing loop-sequence-associated constructs in the future. + + It also removes the need for the special treatment of end-loop + directives. While this patch temporarily degrades the error messaging + for misplaced end-loop directives, it enables uniform handling of any + misplaced end-directives in the future. + +commit 8be46410248f8298af1f12be1c52e2824ce25951 +Author: Eugene Epshteyn +Date: Fri Nov 21 12:03:51 2025 -0500 + + [flang] Use hlfir.cmpchar for SELECT CASE of charsSelect case hlfir cmpchar (#168476) + + For SELECT CASE with character selector, instead of allways calling + runtime comparison function, emit hlfir.cmpchar. This has different + behaviors at different optimization levels: at -O0, it still emits + flang-rt call, but at higher optimization levels it does inline + comparison. Modify test/Lower/select-case-statement.f90 to test both + comparison cases. + +commit f4ebee0ca980f807de32841288b3785dadbc471d +Author: Ferran Toda +Date: Fri Nov 21 15:16:30 2025 +0100 + + [Flang][OpenMP] Add semantic support for Loop Sequences and OpenMP loop fuse (#161213) + + This patch adds semantics for the `omp fuse` directive in flang, as + specified in OpenMP 6.0. This patch also enables semantic support for + loop sequences which are needed for the fuse directive along with + semantics for the `looprange` clause. These changes are only semantic. + Relevant tests have been added , and previous behavior is retained with + no changes. + + --------- + + Co-authored-by: Ferran Toda + Co-authored-by: Krzysztof Parzyszek + +commit d69320e775a7c4af8f6e6bb6bd0574ead8e3d69c +Author: Krzysztof Parzyszek +Date: Fri Nov 21 07:40:44 2025 -0600 + + [OpenMP] Introduce "loop sequence" as directive association (#168934) + + OpenMP 6.0 introduced a `fuse` directive, and with it a "loop sequence" + as the associated code. What used to be "loop association" has become + "loop-nest association". + + Rename Association::Loop to LoopNest, add Association::LoopSeq to + represent the "loop sequence" association. + + Change the association of fuse from "block" to "loop sequence". + +commit 364fe55c42aaac63b2a28e54fa4e31cc6efcf4a8 +Author: jeanPerier +Date: Thu Nov 20 15:37:53 2025 +0100 + + [flang] simplify pointer assignments (#168732) + + Pointer assignment lowering was done in different ways depending on + contexts and types, sometimes still using runtime calls when this is not + needed and the complexity of doing this inline is very limited (the + pointer and target descriptors were already prepared inline, the runtime + is just doing the descriptor assignment and ensuring the pointer + descriptor keep its pointer flag). + + Slightly extent the inline version that was used for Forall and use it + for all cases. + When lowering without HLFIR is removed, this will allow removing more + code. + +commit 8c674f04aa57766bbc7fac97c1e42526b22a95a4 +Author: Akash Banerjee +Date: Mon Nov 17 17:18:12 2025 +0000 + + [OpenMP][Flang] Change the OmpDefaultMapperName suffix (#168399) + + This PR fixes a Fortran syntax violation in the OpenMP default mapper + naming convention. The suffix .omp.default.mapper contains dots which + are invalid in Fortran identifiers, causing failures when mappers are + written to and read from module files. The fix changes the suffix to + _omp_default_mapper which uses underscores instead of dots, complying + with Fortran syntax rules. + + Key changes: + + - Changed OmpDefaultMapperName constant from .omp.default.mapper to + _omp_default_mapper + - Added GetUltimate() calls in mapper symbol resolution to properly + handle symbols across module boundaries + - Added new test case verifying default mappers work correctly when + defined in a module and used in consuming programs + + This fixes #168336. + +commit e70e9ec3b83757761ccbba217a566d77b561ec53 +Author: Krzysztof Parzyszek +Date: Mon Nov 17 08:02:36 2025 -0600 + + [flang][OpenMP] Store Block in OpenMPLoopConstruct, add access functions (#168078) + + Instead of storing a variant with specific types, store parser::Block as + the body. Add two access functions to make the traversal of the nest + simpler. + + This will allow storing loop-nest sequences in the future. + +commit 8aa7d823b0cba96e54d4d73539df4b82c3b401b9 +Author: Akash Banerjee +Date: Fri Nov 14 15:59:48 2025 +0000 + + [OpenMP][Flang] Emit default declare mappers implicitly for derived types (#140562) + + This patch adds support to emit default declare mappers for implicit + mapping of derived types when not supplied by user. This especially + helps tackle mapping of allocatables of derived types. + +commit 3b83e7fa4ec18991a25d49741418e2b3d325692c +Author: Jean-Didier PAILLEUX +Date: Fri Nov 14 14:06:46 2025 +0100 + + [flang] Implement !DIR$ IVDEP directive (#133728) + + This directive tells the compiler to ignore vector dependencies in the + following loop and it must be placed before a `do loop`. + + Sometimes the compiler may not have sufficient information to decide + whether a particular loop is vectorizable due to potential dependencies + between iterations and the directive is here to tell to the compiler + that vectorization is safe with `parallelAccesses` metadata. + + This directive is also equivalent to `#pragma clang loop assume(safety)` + in C++ + +commit 056f744789ce3cc0b2e3ac1451f5bb32cc6e133e +Author: Eugene Epshteyn +Date: Fri Nov 14 07:05:08 2025 -0500 + + [flang] Removed old option -fdebug-dump-pre-fir (#168008) + + This option has long been replaced by `-fc1 -fdebug-dump-pft`. Removed + the old option and updated one test that still used it. + +commit 833ffa54f25f4e6716bfd95920a08c6c8abf4b56 +Author: Akash Banerjee +Date: Thu Nov 13 19:21:34 2025 +0000 + + [Flang][OpenMP] Update declare mapper lookup via use-module (#167903) + +commit e1324a93778624661345229f3acfe258bc495d95 +Author: Akash Banerjee +Date: Thu Nov 13 16:05:33 2025 +0000 + + Revert "[Flang][OpenMP] Update declare mapper lookup via use-module" (#167896) + + Reverts llvm/llvm-project#163860 + +commit bb5f3a08b6ee7baeab6cc4635a9240a8b9dbeb9e +Author: Akash Banerjee +Date: Thu Nov 13 15:07:46 2025 +0000 + + [Flang][OpenMP] Update declare mapper lookup via use-module (#163860) + + - Implemented semantic TODO to catch undeclared mappers. + - Fix mapper lookup to include modules imported through USE. + - Update and add tests. + + Fixes #163385. + +commit 7838dbee3a307cd8bd129ee8dbb998209133bffe +Author: Jack Styles +Date: Wed Nov 12 13:15:34 2025 +0000 + + [Flang][OpenMP] Add Lowering support for Collapse with Taskloop (#166791) + + Support for lowering collapse already exists within + `genLoopNestClauses`, which is called when lowering taskloop. However, + the TODO message still included the Collapse clause, so it was not + activated. By removing this, it enables lowering of the Collapse clause + in taskloop. + +commit cfc56c982fe144455db25f20576c9297fc68a8f9 +Author: Abid Qadeer +Date: Wed Nov 12 10:21:32 2025 +0000 + + [flang][debug] Track dummy argument positions explicitly. (#167489) + + CHARACTER dummy arguments were treated as local variables in debug info. + This happened because our method to get the argument number was not + robust. It relied on `DeclareOp` having a direct reference to arguments + which was not the case for character arguments. This is fixed by storing + source-level argument positions in `DeclareOp`. + + Fixes #112886 + +commit d02a5ae10bab57be80bd460d10e6bb30959bdacc +Author: Jean-Didier PAILLEUX +Date: Wed Nov 12 09:40:04 2025 +0100 + + [flang] Adding lowering of TEAMs features to PRIF in MIF Dialect (#165573) + + Support for multi-image features has begun to be integrated into LLVM + with the MIF dialect. + In this PR, you will find lowering and operations related to the TEAM + features (`SYNC TEAM`, `GET_TEAM`, `FORM TEAM`, `CHANGE TEAM`, + `TEAM_NUMBER`). + + Note regarding the operation for `CHANGE TEAM` : This operation is + partial because it does not support the associated list of coarrays + because the allocation of a coarray and the lowering of PRIF's + `prif_alias_{create|destroy}` procedures are not yet supported in Flang. + This will be integrated later. + + Any feedback is welcome. + +commit eb614cda37bdf14d5371f6b41a475c68c9a7fdec +Author: Kaviya Rajendiran <67495422+kaviya2510@users.noreply.github.com> +Date: Tue Nov 11 21:02:58 2025 +0530 + + [Flang][OpenMP][MLIR] Lowering of reduction,inreduction, nogroup and lastprivate clause to MLIR (#166751) + + This patch add MLIR lowering support for nogroup, reduction, inreduction + and lastprivate clauses of taskloop directive. + +commit faf9ac0f6fc284e26515c55787cefd6ec807ab36 +Author: Kaviya Rajendiran <67495422+kaviya2510@users.noreply.github.com> +Date: Tue Nov 11 09:49:14 2025 +0530 + + [Flang][MLIR][OpenMP] Add MLIR lowering support for taskloop clauses. (#165851) + + This patch add MLIR lowering support for the following taskloop clauses: + + 1. Default clause + 2. Shared clause + 3. Allocate clause + 4. Final clause + 5. If clause + 6. Mergeable clause + 7. Priority clause + 8. Untied clause + +commit 86fa018a1dca99a1a199e1a0e6f5730546198824 +Author: agozillon +Date: Tue Nov 11 03:15:58 2025 +0100 + + [Flang][OpenMP] Initial defaultmap(none) implementation (#166715) + + This PR adds defaultmap(none) behaviour to Flang, where we emit a + semantic error if variables within the target construct do not have an + associated data attribute. Similar to the way default behaves, as + described by the OpenMP specification. + +commit cf1f871023e432837581b84c8563f3b0690dd9d3 +Author: Thirumalai Shaktivel <74826228+Thirumalai-Shaktivel@users.noreply.github.com> +Date: Mon Nov 10 09:44:22 2025 +0530 + + [Flang] Add parser support for prefetch directive (#139702) + + Implementation details: + * Recognize prefetch directive in the parser as `!dir$ prefetch ...` + * Unparse the prefetch directive + * Add required tests + + Details on the prefetch directive: + `!dir$ prefetch designator[, designator]...`, where the designator list + can be a variable or an array reference. This directive is used to + insert a hint to the code generator to prefetch instructions for + memory references. + +commit 3d3fab17f5ea8a14eb390f53075c094f5e1f19fa +Author: Krzysztof Parzyszek +Date: Mon Nov 3 07:37:13 2025 -0600 + + [flang][OpenMP] Use OmpDirectiveSpecification in ALLOCATE (#165865) + + The ALLOCATE directive has two forms: + - A declarative form with a standalone directive: + ``` + !$OMP ALLOCATE (variable-list-item...) + ``` + - An executable form that consists of several directives followed by an + ALLOCATE statement: + ``` + !$OMP ALLOCATE (variable-list-item...) + !$OMP ALLOCATE (variable-list-item...) + ... + ALLOCATE (...) + ``` + + The second form was deprecated in OpenMP 5.2 in favor of the ALLOCATORS + construct. + + Since in the parse tree every type corresponding to a directive only + corresponds to a single directive, the executable form is represented by + a sequence of nested OmpAllocateDirectives, e.g. + ``` + !$OMP ALLOCATE(x) + !$OMP ALLOCATE(y) + ALLOCATE(x, y) + ``` + will become + ``` + OmpAllocateDirective + |- ALLOCATE(x) // begin directive + `- OmpAllocateDirective // block + |- ALLOCATE(y) // begin directive + `- ALLOCATE(x, y) // block + ``` + With this change all AST nodes for directives use + OmpDirectiveSpecification as the directive representation. + +commit 63e45ef1bfc0208048fcd9d4264e4e94f508733f +Author: Daniel Chen +Date: Fri Oct 31 09:49:50 2025 -0400 + + To fix polymorphic pointer assignment in FORALL when LHS is unlimited polymorphic and RHS is intrinsic type target (#164999) + + Fixes #143569. + +commit c1779f33bdada6e478e882cc23a647ef9abaad96 +Author: Jean-Didier PAILLEUX +Date: Tue Oct 28 08:02:15 2025 +0100 + + [flang] Implement !DIR$ [NO]INLINE and FORCEINLINE directives (#134350) + + This patch adds the support of these two directives : `!dir$ inline` and + `!dir$ noinline`. + - `!dir$ noinline` tells to the compiler to not perform inlining on + specific function calls by adding the `noinline` metadata on the call. + - `!dir$ inline` tells to the compiler to attempt inlining on specific + function calls by adding the `inlinehint` metadata on the call. + - `!dir$ forceinline` tells to the compiler to always perfom inlining on + specific function calls by adding the `alwaysinline` metadata on the + call. + + Currently, these directives can be placed before a `DO LOOP`, call + functions or assignments. Maybe other statements can be added in the + future if needed. + + For the `inline` directive the correct name might be `forceinline` but + I'm not sure ? + +commit 23ead476550a667d532554e966704494173fd9d7 +Author: Jakub Kuderski +Date: Wed Oct 22 12:47:48 2025 -0400 + + [flang][mlir] Migrate to free create functions. NFC. (#164657) + + See + https://discourse.llvm.org/t/psa-opty-create-now-with-100-more-tab-complete/87339. + + I plan to mark these as deprecated in + https://github.com/llvm/llvm-project/pull/164649. + +commit 2dbe9592663a701546efd1ec1396417629542e4b +Author: Daniel Chen +Date: Wed Oct 22 10:24:39 2025 -0400 + + Get the BoxType from the RHS instead of LHS for polymorphic pointer assignment inside FORALL. (#164279) + + Fixes #153220 + +commit c9fb37c75f741f1179f2d2c661d27d36645b0310 +Author: jeanPerier +Date: Wed Oct 22 11:46:18 2025 +0200 + + [flang][FIR] add fir.assumed_size_extent to abstract assumed-size extent encoding (#164452) + + The purpose of this patch is to allow converting FIR array representation to + memref when possible without hitting memref verifier issue. + + The issue was that FIR arrays may be assumed size, in which case the + last dimension will not be known at runtime. Flang uses -1 to encode + this to fulfill Fortran 2023 standard requirements in 18.5.3 point 5 + about CFI_desc_t. + + When arrays are converted to memeref, if this `-1` reaches memeref + operations, it triggers verifier errors (even if the conversion happened + in code that guards the code to be entered at runtime if the array is + assumed-size because folders/verifiers do not take into account + reachability). + + This follows-up on discussions in #163505 merge requests + +commit f2b20d3410e4c0cc3be4a5b69e00120cab9f1d5e +Author: agozillon +Date: Tue Oct 21 21:54:25 2025 +0200 + + [Flang][OpenMP][Dialect] Swap to using MLIR dialect enum to encode map flags (#164043) + + This PR shifts from using the LLVM OpenMP enumerator bit flags to an + OpenMP dialect specific enumerator. This allows us to better represent + map types that wouldn't be of interest to the LLVM backend and runtime + in the dialect. + + Primarily things like + ref_ptr/ref_ptee/ref_ptr_ptee/atach_none/attach_always/attach_auto which + are of interest to the compiler for certrain transformations (primarily + in the FIR transformation passes dealing with mapping), but the runtime + has no need to know about them. It also means if another OpenMP + implementation comes along they won't need to stick to the same bit flag + system LLVM chose/do leg work to address it. + +commit 5cd9f0f655ac2ab9da4fbd049fbcba6eb0d793b9 +Author: Peter Klausler +Date: Mon Oct 20 13:20:33 2025 -0700 + + [flang] Move parse tree tool to Parser/tools.h (#163998) + + Move the parse tree utility function + semantics::getDesignatorNameIfDataRef to Parser/tools.h and rename it to + comply with the local style. + +commit 7d25ba39c8ac4a08c30620463bdc5f586b43c1cd +Author: Kazu Hirata +Date: Fri Oct 17 07:27:28 2025 -0700 + + [flang] Replace LLVM_ATTRIBUTE_UNUSED with [[maybe_unused]] (NFC) (#163916) + + This patch replaces LLVM_ATTRIBUTE_UNUSED with [[maybe_unused]], + introduced as part of C++17. + +commit e55071b157870d6e046e6bb315a449a2445c7e41 +Author: Valentin Clement (バレンタイン クレメン) +Date: Thu Oct 16 10:33:50 2025 -1000 + + [flang][cuda] Extent detection of data transfer with conversion (#163852) + +commit d55879de50024a29bc5533337f3cc7d8553d6cae +Author: Krzysztof Parzyszek +Date: Thu Oct 16 07:02:49 2025 -0500 + + [flang][OpenMP] Emit requirements in module files (#163449) + + For each program unit, collect the set of requirements from REQUIRES + directives in the source, and modules used by the program unit, and add + them to the details of the program unit symbol. + + The requirements in the symbol details as now stored as clauses. Since + requirements need to be emitted in the module files as OpenMP + directives, this makes the clause emission straightforward via + getOpenMPClauseName. + + Each program unit, including modules, the corresponding symbol will have + the transitive closure of the requirements for everything contained or + used in that program unit. + +commit 7eee67202378932d03331ad04e7d07ed4d988381 +Author: Kelvin Li +Date: Sat Oct 11 09:48:02 2025 -0400 + + [flang] Fix build breakage with FLANG_ENABLE_WERROR on (NFC) (#162894) + +commit de55329b6ed7184771fb036e52475a63ebc67c97 +Author: jeanPerier +Date: Fri Oct 10 11:19:54 2025 +0200 + + [flang][NFC] update createTempFromMold interface to return a bool (#162680) + + Some createTempFromMold users are looking for a compile time constant + for the `mustFree`. Instead of having them retrieving it, update the + interface to return a bool. The only users that needs a value was + `packageBufferizedExpr` and it has an overload that accept bool too. + + Tests are updated to reflect that this changes the place where the + boolean is created in BufferizeHLFIR, and just removes its creation in + contexts it is not needed. + +commit 6a02c0f2fa696b226e9d3c5250802cc045985876 +Author: Alexey Bataev +Date: Thu Oct 9 14:32:10 2025 -0400 + + [Flang]Fix propagation of loop collapse number for target-based directives (#162707) + +commit 5873d6a371655d93df8221c92a51030a4619f84f +Author: Susan Tan (ス-ザン タン) +Date: Thu Oct 9 12:50:24 2025 -0400 + + [flang][openacc] Add support for force clause for loop collapse (#162534) + + Currently the force clause `collapse (force:num_level)` is NYI. Added + support to sink any prologue and epilogue code to the inner most level + as specified. + +commit 121026b186687ea00e5f792ea84d43b09597088a +Author: jeanPerier +Date: Thu Oct 9 14:26:41 2025 +0200 + + [flang][openacc] map data operand results to symbols inside compute region (#162306) + + Variable references inside OpenACC compute and loop region were + currently always lowered to usages of the same SSA values than in the + host thread, even for variables that appear in data clauses and for + which acc data operations are created. + + This makes it a non-trivial task to identify implicit data usages vs + usage of data appearing in clauses because the SSA addresses used in the + region may have a non-trivial SSA relationship with the SSA addresses + used as inputs of the data operations, especially after CSE runs that + may merge component or array element addressing operations with similar + addressing on the host thread (fir.coordinate/hlfir.designate). + + This patch updates OpenACC lowering to remap the Symbol that appear in + data clauses to the related acc data operation result for the scope of + the compute or loop region. + + To allow FIR passes to reason about these addresses, a new hlfir.declare + operation is created with the acc data operation result. This gives + access to the shape, contiguity, attributes, and dummy argument + relationships inside the region without having FIR extended to + understand the data operations. + +commit 375f48942b9a3f3fbd82133390af25b6c96f1460 +Author: Michael Kruse +Date: Fri Oct 3 15:52:48 2025 +0200 + + [Flang] Add standalone tile support (#160298) + + Add support for the standalone OpenMP tile construct: + ```f90 + !$omp tile sizes(...) + DO i = 1, 100 + ... + ``` + + This is complementary to #143715 which added support for the tile + construct as part of another loop-associated construct such as + worksharing-loop, distribute, etc. + +commit c242aff2452fb662a7ea23954abe654b51182b8e +Author: Valentin Clement (バレンタイン クレメン) +Date: Thu Oct 2 04:43:45 2025 -1000 + + [flang][cuda][openacc] Create new symbol in host_data region for CUDA Fortran interop (#161613) + +commit 727aad15f0a897826fc9102b5a090b977c554097 +Author: Krzysztof Parzyszek +Date: Fri Sep 26 15:47:31 2025 -0500 + + [flang][OpenMP] Use OmpDirectiveSpecification in DECLARE_TARGET (#160573) + +commit d6e20c42c1f916fa925f0b1c2c37f3098ada1686 +Author: Slava Zakharin +Date: Fri Sep 26 09:40:04 2025 -0700 + + [flang] Clean-up for fir.do_loop generation in lowering. (#160630) + + This patch changes two things: + 1. We do not need to use the loop counter's last value + for regular do-loops in Lowering. + 2. The loop counter's increment is implied by fir.do_loop + operation, so there is no need to increment it explicitly. + + The last point has been especially confusing to me, because it was + unclear why we have an explicit increment if it is implied. + It looks like CFGConversion somehow still makes the final code + correct, i.e. the counter is not incremented twice. + Anyway, the new lowering should look more concise. + +commit 3ca59104cfe9c47ef64ce44491e7f0c1fbc9f788 +Author: Krzysztof Parzyszek +Date: Tue Sep 23 08:50:15 2025 -0500 + + [flang][OpenMP] Use OmpDirectiveSpecification in DECLARE_MAPPER (#160169) + +commit 8c189327e5573e597b3eead418beab6aaea72ca3 +Author: Krzysztof Parzyszek +Date: Fri Sep 19 10:50:23 2025 -0500 + + [flang][OpenMP] Use OmpDirectiveSpecification in SECTIONS (#159580) + +commit e75e28ad3c9558c2cca32cd16cd5681b5219ff8d +Author: Krzysztof Parzyszek +Date: Tue Sep 16 11:38:03 2025 -0500 + + [flang][OpenMP] Use OmpDirectiveSpecification in Omp[Begin|End]LoopDi… (#159087) + + …rective + + This makes accessing directive components, such as directive name or the + list of clauses simpler and more uniform across different directives. It + also makes the parser simpler, since it reuses existing parsing + functionality. + + The changes are scattered over a number of files, but they all share the + same nature: + - getting the begin/end directive from OpenMPLoopConstruct, + - getting the llvm::omp::Directive enum, and the source location, + - getting the clause list. + +commit 5365f8bc91d2d820092e904ecab21b841e3f5abb +Author: Akash Banerjee +Date: Mon Sep 15 16:11:55 2025 +0100 + + Revert "[NFC][Flang] Move bounds helper functions to Util header." (#158654) + + Reverts llvm/llvm-project#154164 + +commit 32ab6ff9f95739cba9954e666479d30e126af53c +Author: Akash Banerjee +Date: Mon Sep 15 15:45:49 2025 +0100 + + [NFC][Flang] Move bounds helper functions to Util header. (#154164) + + This PR moves the `needsBoundsOps` and `genBoundsOps` helper functions + to `flang/include/flang/Optimizer/OpenMP/Utils.h`. + +commit 5149e51cb25d6a68365ca3bd9300cff1b18213e2 +Author: Jean-Didier PAILLEUX +Date: Fri Sep 12 15:29:21 2025 +0200 + + [flang][Lower] Add lowering to SYNC ALL, SYNC MEMORY and SYNC IMAGES to PRIF (#154166) + + In relation to the approval and merge of the + https://github.com/llvm/llvm-project/pull/76088 specification about + multi-image features in Flang. + Here is a PR on adding support for SYNC ALL, SYNC MEMORY and SYNC IMAGES + in conformance with the PRIF specification. + + --------- + + Co-authored-by: Katherine Rasmussen + +commit d452e67ee7b5d17aa040f71d8997abc1a47750e4 +Author: Jan Leyonberg +Date: Wed Sep 10 09:25:40 2025 -0400 + + [flang][OpenMP] Enable tiling (#143715) + + This patch enables tiling in flang. In MLIR tiling is handled by + changing the the omp.loop_nest op to be able to represent both collapse + and tiling, so the flang front-end will combine the nested constructs into + a single MLIR op. The MLIR->LLVM-IR lowering of the LoopNestOp is + enhanced to first do the tiling if present, then collapse. + +commit 2a2296b1aab4614bf6c95c3003000832c9d43de5 +Author: Razvan Lupusoru +Date: Tue Sep 9 13:09:04 2025 -0700 + + [flang][acc] Fix incorrect loop body nesting and IV value use (#157708) + + Two issues are being resolved: + - Incorrect loop body nesting caused by insertion point not being + updated after the loop. The scenario is now being tested through + `nested_do_loops` function in the test. + - Incorrect IV ssa values due to incorrect handling of scoping. + + Additionally, this also adds `--openacc-do-loop-to-acc-loop` flag so + that the implicit conversion can be disabled for testing. + +commit c85e6ac74d7ccb36dfaaf94989f27c22cf5c7e7c +Author: Kareem Ergawy +Date: Mon Sep 8 14:33:00 2025 +0200 + + [NFC][flang][OpenMP] Extract target region utils to map or clone outside values (#155754) + + Following up on #154483, this PR introduces further refactoring to + extract some shared utils between OpenMP lowering and `do concurrent` + conversion pass. In particular, this PR extracts 2 utils that handle + mapping or cloning values used inside target regions but defined + outside. + + Later `do concurrent` PR(s) will also use these utils. + + PR stack: + - https://github.com/llvm/llvm-project/pull/155754 ◀️ + - https://github.com/llvm/llvm-project/pull/155987 + - https://github.com/llvm/llvm-project/pull/155992 + - https://github.com/llvm/llvm-project/pull/155993 + - https://github.com/llvm/llvm-project/pull/156589 + - https://github.com/llvm/llvm-project/pull/156610 + - https://github.com/llvm/llvm-project/pull/156837 + +commit 83da8d08ff110fd5bf3fff17043420ba442e300c +Author: Slava Zakharin +Date: Thu Sep 4 15:49:11 2025 -0700 + + [flang] Attach proper storage to [hl]fir.declare in lowering. (#155742) + + As described in + https://discourse.llvm.org/t/rfc-flang-representation-for-objects-inside-physical-storage/88026, + `[hl]fir.declare` should carry information about the layout + of COMMON/EQUIVALENCE variables within the physical storage. + + This patch modifes Flang lowering to attach this information. + +commit 88b71e20488ae0987b7ec7cfa9d49d9358b1f38c +Author: Krzysztof Parzyszek +Date: Fri Aug 29 07:37:48 2025 -0500 + + [flang][OpenMP] Replace OpenMPBlockConstruct with OmpBlockConstruct (#155872) + + OpenMPBlockConstruct, somewhat confusingly, represents most but not all + block-associated constructs. It's derived from OmpBlockConstruct, as are + all the remaining block-associated constructs. + + It does not correspond to any well-defined group of constructs. It's the + collection of constructs that don't have their own types (and those that + do have their own types do so for their own reasons). + + Using the broader OmpBlockConstruct in type-based visitors won't cause + issues, because the specific overloads (for classes derived from it) + will always be preferred. + +commit 9cf8752ccfd194c3fa1cda641db2e3c77aa4915c +Author: Kajetan Puchalski +Date: Thu Aug 28 15:58:29 2025 +0100 + + [flang][OpenMP] Handle symbols on composite simd with multiple privatizers (#155640) + + In some cases, a clause on a composite simd construct applied to simd + can be using a symbol that is also used by another privatizer, not + applied to simd. Correctly handle this scenario by checking which + directive the privatizer is being generated for while determining + whether to emit the copy region. + + Fixes #155195. + + Signed-off-by: Kajetan Puchalski + +commit 86e4c175e3de73bf529b6849614fefcb4b8d7011 +Author: Kareem Ergawy +Date: Wed Aug 27 18:25:36 2025 +0200 + + [NFC][flang][OpenMP] Create `FortranUtils` lib and move `createMapInfoOp` to it (#154483) + +commit 044e1aabbd4b92a2e05a52e9a1630c2fe548d358 +Author: Tom Eccles +Date: Tue Aug 26 11:45:56 2025 +0100 + + [flang][OpenMP] move omp end sections validation to semantics (#154740) + + See #90452. The old parse tree errors exploded to thousands of unhelpful + lines when there were multiple missing end directives. + + Instead, allow a missing end directive in the parse tree then validate + that it is present during semantics (where the error messages are a lot + easier to control). + +commit 21019a3c11b56776809ec65af35b050f58570b77 +Author: Chaitanya +Date: Tue Aug 26 09:30:21 2025 +0530 + + [flang][openmp] Add Lowering to omp mlir for workdistribute construct (#154378) + + This PR adds lowering of workdistribute construct in flang to omp mlir dialect workdistribute op. + + The work in this PR is c-p and updated from @ivanradanov commits from coexecute implementation: + flang_workdistribute_iwomp_2024 + +commit 8a5b6b302eb55e514f086ffcee3cf4f81750bb5a +Author: Kazu Hirata +Date: Wed Aug 20 16:30:24 2025 -0700 + + [flang] Use SmallPtrSet directly instead of SmallSet (NFC) (#154471) + + I'm trying to remove the redirection in SmallSet.h: + + template + class SmallSet : public SmallPtrSet + {}; + + to make it clear that we are using SmallPtrSet. There are only + handful places that rely on this redirection. + + This patch replaces SmallSet to SmallPtrSet where the element type is + a pointer. + +commit 42350f428db0d053610a82a747eb240afc4d9250 +Author: Krzysztof Parzyszek +Date: Tue Aug 19 08:32:43 2025 -0500 + + [flang][OpenMP] Parse GROUPPRIVATE directive (#153807) + + No semantic checks or lowering yet. + +commit 0e93dbc6b1cac9c69c546cff7b5dd5935917ae9e +Author: Kareem Ergawy +Date: Tue Aug 19 12:07:17 2025 +0200 + + [flang] `do concurrent`: Enable delayed localization by default (#154303) + + Enables delayed localization by default for `do concurrent`. Tested both + gfortran and Fujitsu test suites. + + All tests pass for gfortran tests. For Fujitsu, enabled delayed + localization passes more tests: + + Delayed localization disabled: + Testing Time: 7251.76s + Passed : 88520 + Failed : 162 + Executable Missing: 408 + + Delayed localization enabled: + Testing Time: 7216.73s + Passed : 88522 + Failed : 160 + Executable Missing: 408 + +commit acdbb00af5d0b6469fceb8abb26634de2dbee985 +Author: Jean-Didier PAILLEUX +Date: Sat Aug 16 01:04:49 2025 +0200 + + [flang] Adding support of -fcoarray flang and init PRIF (#151675) + + In relation to the approval and merge of the + [PRIF](https://github.com/llvm/llvm-project/pull/76088) specification + about multi-image features in Flang, here is a first PR to add support + for the `-fcoarray` compilation flag and the initialization of the PRIF + environment. + Other PRs will follow for adding support of lowering to PRIF. + +commit b9e33fd49386a4be569e7d579c24e0e2a9607943 +Author: Kareem Ergawy +Date: Fri Aug 15 08:45:02 2025 +0200 + + [flang] Do not re-localize loop ivs when nested inside `block`s (#153350) + + Consider the following example: + ```fortran + implicit none + integer :: i, j + + do concurrent (i=1:10) local(j) + block + do j=1,20 + end do + end block + end do + ``` + + Without the fix introduced in this PR, the compiler would "re-localize" + the `j` variable inside the `fir.do_concurrent` loop: + ```mlir + fir.do_concurrent { + %7 = fir.alloca i32 {bindc_name = "j"} + %8:2 = hlfir.declare %7 {uniq_name = "_QFloop_in_nested_blockEj"} : (!fir.ref) -> (!fir.ref, !fir.ref) + ... + fir.do_concurrent.loop (%arg0) = (%5) to (%6) step (%c1) local(@_QFloop_in_nested_blockEj_private_i32 %4#0 -> %arg1 : !fir.ref) { + %12:2 = hlfir.declare %arg1 {uniq_name = "_QFloop_in_nested_blockEj"} : (!fir.ref) -> (!fir.ref, !fir.ref) + ... + %17:2 = fir.do_loop %arg2 = %14 to %15 step %c1_1 iter_args(%arg3 = %16) -> (index, i32) { + fir.store %arg3 to %8#0 : !fir.ref + ... + } + } + } + ``` + + This happened because we did a shallow look-up of `j` and since the loop + is nested inside a `block`, the look-up failed and we re-created a local + allocation for `j` inside the parent `fir.do_concurrent` loop. This + means that we ended up not using the actual localized symbol which is + passed as a region argument to the `fir.do_concurrent.loop` op. + + In case of `j`, we do not need to do a shallow look-up. The shallow + look-up is only needed if a symbol is an OpenMP private one or an + iteration variable of a `do concurrent` loop. Neither of which applies + to `j`. + + With the fix, `j` is properly resolved to the `local` region argument: + ```mlir + fir.do_concurrent { + ... + fir.do_concurrent.loop (%arg0) = (%5) to (%6) step (%c1) local(@_QFloop_in_nested_blockEj_private_i32 %4#0 -> %arg1 : !fir.ref) { + ... + %10:2 = hlfir.declare %arg1 {uniq_name = "_QFloop_in_nested_blockEj"} : (!fir.ref) -> (!fir.ref, !fir.ref) + ... + %15:2 = fir.do_loop %arg2 = %12 to %13 step %c1_1 iter_args(%arg3 = %14) -> (index, i32) { + fir.store %arg3 to %10#0 : !fir.ref + ... + } + } + } + ``` + +commit d3d96e20573771c9b0f54a07c1557c448b1d9ae1 +Author: Kajetan Puchalski +Date: Thu Aug 14 14:20:15 2025 +0100 + + [flang][OpenMP] Add -f[no]-openmp-simd (#150269) + + Both clang and gfortran support the -fopenmp-simd flag, which enables + OpenMP support only for simd constructs, while disabling the rest of + OpenMP. + + Implement the appropriate parse tree rewriting to remove non-SIMD OpenMP + constructs at the parsing stage. + + Add a new SimdOnly flang OpenMP IR pass which rewrites generated OpenMP + FIR to handle untangling composite simd constructs, and clean up OpenMP + operations leftover after the parse tree rewriting stage. + With this approach, the two parts of the logic required to make the flag + work can be self-contained within the parse tree rewriter and the MLIR + pass, respectively. It does not need to be implemented within the core + lowering logic itself. + + The flag is expected to have no effect if -fopenmp is passed explicitly, + and is only expected to remove OpenMP constructs, not things like OpenMP + library functions calls. This matches the behaviour of other compilers. + + --------- + + Signed-off-by: Kajetan Puchalski + +commit a2899c457ecac9f2511fa08926bcf1c22eee1d14 +Author: Valentin Clement (バレンタイン クレメン) +Date: Wed Aug 13 10:55:15 2025 -0700 + + [flang][cuda] Support data transfer with conversion (#153242) + + When the rhs of the data transfer is from a different type, allocate a + new temp on the host and first transfer the rhs to it. Then, use the + elemental op created to do the conversion. + +commit 3b10b9a2b03a2954d9da54c0f1137daeb9e339c4 +Author: Akash Banerjee +Date: Mon Aug 11 12:45:22 2025 +0100 + + [MLIR][OpenMP] Add lowering support for AUTOMAP modifier (#151513) + + Add Automap modifier to the MLIR op definition for the DeclareTarget + directive's Enter clause. Also add lowering support in Flang. + + Automap Ref: OpenMP 6.0 section 7.9.7. + +commit d7d0d7a80fc343750bbf85ea8c184737d9c70f62 +Author: Kareem Ergawy +Date: Fri Aug 8 06:22:53 2025 +0200 + + [flang] Skip processing reductions for unstructured `do concurrent` loops (#150188) + + Fixes #149563 + + When emitting unstructured `do concurrent` loops, reduction processing + should be skipped since we are not emitting `fir.do_concurrent` loop in + the first place. + +commit e368b5343d037c89051097c2a87a6fb76548014e +Author: Krzysztof Parzyszek +Date: Thu Aug 7 08:10:25 2025 -0500 + + [flang][OpenMP] Make OpenMPCriticalConstruct follow block structure (#152007) + + This allows not having the END CRITICAL directive in certain situations. + Update semantic checks and symbol resolution. + +commit eb0ddba26b6a265b44b442ae666db43b9f28b26a +Author: Valentin Clement (バレンタイン クレメン) +Date: Wed Aug 6 21:49:55 2025 -0700 + + Reland "[flang][cuda] Set the allocator of derived type component after allocation" (#152418) + + Reviewed in #152379 + - Move the allocator index set up after the allocate statement otherwise + the derived type descriptor is not allocated. + - Support array of derived-type with device component + +commit 2696e8c1499682f0b1f357d9035ed59f544892f8 +Author: Valentin Clement (バレンタイン クレメン) +Date: Wed Aug 6 18:49:52 2025 -0700 + + [flang][cuda] Remove too restrictive assert for data transfer (#152398) + + When the rhs is a an array element, the assert was triggered but this is + still a valid transfer. Remove the assert. The operation has a verifier + to check its validity. + +commit 7d3134f6cc59f47460646a13abcf824bae05d772 +Author: Valentin Clement (バレンタイン クレメン) +Date: Wed Aug 6 15:55:53 2025 -0700 + + Revert "[flang][cuda] Set the allocator of derived type component after allocation" (#152402) + + Reverts llvm/llvm-project#152379 + + Buildbot failure + https://lab.llvm.org/buildbot/#/builders/207/builds/4905 + +commit d897355876287e410d35f1f0ac74d79955d50dd4 +Author: Valentin Clement (バレンタイン クレメン) +Date: Wed Aug 6 15:14:00 2025 -0700 + + [flang][cuda] Set the allocator of derived type component after allocation (#152379) + + - Move the allocator index set up after the allocate statement otherwise + the derived type descriptor is not allocated. + - Support array of derived-type with device component + +commit 3847620ba9a22a13de30bd77d059aae6f484dd94 +Author: Valentin Clement (バレンタイン クレメン) +Date: Tue Aug 5 07:27:43 2025 -0700 + + [flang][NFC] Move the rest of ops creation to new APIs (#152079) + +commit 47ef3d069bcfb8ec31c06cdd619557c84d1084ad +Author: Carlos Seo +Date: Tue Aug 5 10:53:18 2025 -0300 + + [Flang] Avoid crash when a function return is undefined (#151577) + + Properly terminate the StatementContext cleanup when a function return + value is undefined. + + Fixes #126452 + +commit cc2a385e65483688d3e4a0091e0767960f9eb8c2 +Author: agozillon +Date: Tue Aug 5 15:48:37 2025 +0200 + + [Flang][OpenMP] Make implicitly captured scalars fully firstprivatized (#147442) + + Currently, we indicate to the runtime that implicit scalar captures are + firstprivate (via map and + capture types), enough for the runtime trace to treat it as such, but we + do not CodeGen the IR + in such a way that we can take full advantage of this aspect of the + OpenMP specification. + + This patch seeks to change that by applying the correct symbol flags + (firstprivate/implicit) to the + implicitly captured scalars within target regions, which then triggers + the delayed privitization code + generation for these symbols, bringing the code generation in-line with + the explicit firstpriviate + clause. Currently, similarly to the delayed privitization I have + sheltered this segment of code + behind the EnabledDelayedPrivitization flag, as without it, we'll + trigger an compiler error for + firstprivate not being supported any time we implicitly capture a scalar + and try to firstprivitize + it, in future when this flag is removed it can also be removed here. So, + for now, you need to + enable this via providing the compiler the flag on compilation of any + programs. + +commit e4d3dc6359f568a9b0ac2e1010bbc7d13f4982b6 +Author: Valentin Clement (バレンタイン クレメン) +Date: Mon Aug 4 22:09:08 2025 -0700 + + [flang][NFC] Update HLFIR ops creation to the new APIs (#152075) + + See #147168 + +commit 3b23fdb35def583ae5db58576a7fcb312315879e +Author: Valentin Clement (バレンタイン クレメン) +Date: Mon Aug 4 17:53:44 2025 -0700 + + [flang][NFC] Update more FIR op creation to the new APIs (#152060) + +commit 05b52ef909475f4048e5b8cd86b3671772506682 +Author: Valentin Clement (バレンタイン クレメン) +Date: Mon Aug 4 16:09:24 2025 -0700 + + [flang][cuda][NFC] Update to the new create APIs (#152050) + + Some operation creations were updated in flang directory but not all. + Migrate the CUF ops to the new create APIs introduce in #147168 + +commit 8cc4c6d78f08ca38b5051a09a71ec14ae8931cda +Author: Tom Eccles +Date: Mon Aug 4 12:01:27 2025 +0100 + + [flang][Lower] Make reduction processing failure a hard error (#150233) + + See #150178 + + This may regress some test cases which only ever passed by accident. + + I've tested SPEC2017 and a sample of applications to check that this + doesn't break anything too obvious. Presumably this was not a widely + used feature or we would have noticed the bug sooner. + + I'm unsure whether this should be backported to LLVM 21 or not: I think + it is much better to refuse to compile than to silently produce the + wrong result, but there is a chance this could regress something which + previously worked by accident. Opinions welcome. + +commit 2f33b01651b1041682bab363e556ff1a396262fc +Author: Razvan Lupusoru +Date: Fri Aug 1 09:27:36 2025 -0700 + + [flang] Ensure lowering diagnostic handler does not outlive lowering (#151608) + + When the LoweringBridge is created, it registers an MLIR Diagnostics + handler with the MLIRContext. However, it never deregisters it once + lowering is finished. + + This fixes this particular scenario. It also makes it so that the + Diagnostics handler is optional. + +commit 6533ad04edcbc02d012cdb181d8745ca0d2f2e75 +Author: Krzysztof Parzyszek +Date: Fri Aug 1 07:52:59 2025 -0500 + + [flang][OpenMP] Make all block constructs share the same structure (#150956) + + The structure is + - OmpBeginDirective (aka OmpDirectiveSpecification) + - Block + - optional (aka optional) + + The OmpBeginDirective and OmpEndDirective are effectively different + names for OmpDirectiveSpecification. They exist to allow the semantic + analyses to distinguish between the beginning and the ending of a block + construct without maintaining additional context. + + The actual changes are in the parser: parse-tree.h and openmp-parser.cpp + in particular. The rest is simply changing the way the directive/clause + information is accessed (typically for the simpler). + + All standalone and block constructs now use OmpDirectiveSpecification to + store the directive/clause information. + +commit a361cde4421540e7ba3d6cdae0ef6e2860e126d8 +Author: Kajetan Puchalski +Date: Fri Aug 1 13:12:57 2025 +0100 + + [flang][OpenMP] Support delayed privatisation for composite distribute simd (#151169) + + Implement the lowering for delayed privatisation for composite + "distibute simd"constructs. Fixes new crashes previously masked by simd + information on composite constructs being ignored. + + Signed-off-by: Kajetan Puchalski + +commit c0591477ac99bf8ae51ce116a6471420f128ac9f +Author: Kajetan Puchalski +Date: Fri Aug 1 13:12:21 2025 +0100 + + [flang][OpenMP] Support delayed privatisation for composite do simd (#150979) + + Implement the lowering for delayed privatisation for composite "do simd" + constructs. Fixes new crashes previously masked by simd information on + composite constructs being ignored, such as llvm#150975. + + Signed-off-by: Kajetan Puchalski + +commit 698492290540c9a75c1b8acc75f6c0fd4e468f5b +Author: Krzysztof Parzyszek +Date: Thu Jul 31 07:51:22 2025 -0500 + + [flang][OpenMP] Store directive information in OpenMPSectionConstruct (#150804) + + The OpenMPSectionConstruct corresponds to the `!$omp section` directive, + but there is nothing in the AST node that stores the directive + information. Even though the only possibility (at the moment) is + "section" without any clauses, for improved generality it is helpful to + have that information anyway. + +commit 27f777e9c06daeb03efad9230fe080df2a3a94c5 +Author: Michael Kruse +Date: Wed Jul 30 09:20:42 2025 +0200 + + [Flang][OpenMP] Skip DSA for canonical loops (#150593) + + OpenMP loop transformations to not have data-sharing attributes and do + not explicitly privatize the loop variable. The DataSharingProcessor was + still used in #144785 because `createAndSetPrivatizedLoopVar` expected + it. + + We skip that function and directly write to the loop variable. If the + loop variable is implicitly or explicitly privatized, it will be due to + surrounding OpenMP constructs such as `parallel`. + +commit 4128cf3b26cff997f1f315ee571cbc7110bc250c +Author: Razvan Lupusoru +Date: Tue Jul 29 10:03:22 2025 -0700 + + [flang][acc] Lower do and do concurrent loops specially in acc regions (#149614) + + When OpenACC is enabled and Fortran loops are annotated with `acc loop`, + they are lowered to `acc.loop` operation. And rest of the contained + loops use the normal FIR lowering path. + + Hovever, the OpenACC specification has special provisions related to + contained loops and their induction variable. In order to adhere to + this, we convert all valid contained loops to `acc.loop` in order to + store this information appropriately. + + The provisions in the spec that motivated this change (line numbers are + from OpenACC 3.4): + - 1353 Loop variables in Fortran do statements within a compute + construct are predetermined to be private to the thread that executes + the loop. + - 3783 When do concurrent appears without a loop construct in a kernels + construct it is treated as if it is annotated with loop auto. If it + appears in a parallel construct or an accelerator routine then it is + treated as if it is annotated with loop independent. + + By valid loops - we convert do loops and do concurrent loops which have + induction variable. Loops which are unstructured are not handled. + +commit 9d642b0ec806d13002e2f0b50091ca9656b238e5 +Author: Anchu Rajendran S +Date: Mon Jul 28 05:46:10 2025 -0700 + + [flang][MLIR][OpenMP][llvm]Atomic Control Support (#150860) + +commit efe1aa8904ea3ad8b19ab2aa5660e27a08c7d694 +Author: Kiran Chandramohan +Date: Thu Jul 24 21:54:26 2025 +0100 + + Revert "[flang][flang-driver][mlir][OpenMP] atomic control support" (#150504) + + Reverts llvm/llvm-project#143441 + + Reverting due to CI failure + https://lab.llvm.org/buildbot/#/builders/53/builds/18055. + +commit f44346dc1f6252716cfc62bb0687e3932a93089f +Author: Anchu Rajendran S +Date: Thu Jul 24 09:49:38 2025 -0700 + + [flang][flang-driver][mlir][OpenMP] atomic control support (#143441) + + Atomic Control Options are used to specify architectural characteristics + to help lowering of atomic operations. The options used are: + `-f[no-]atomic-remote-memory`, `-f[no-]atomic-fine-grained-memory`, + `-f[no-]atomic-ignore-denormal-mode`. + Legacy option `-m[no-]unsafe-fp-atomics` is aliased to + `-f[no-]ignore-denormal-mode`. + More details can be found in + https://github.com/llvm/llvm-project/pull/102569. This PR implements the + frontend support for these options with OpenMP atomic in flang. + + Backend changes are available in the draft PR: + https://github.com/llvm/llvm-project/pull/143769 which will be raised + after this merged. + +commit 1ba3859cdbf263182502b1c00546e985bdb633da +Author: Krzysztof Parzyszek +Date: Thu Jul 24 08:59:13 2025 -0500 + + [flang][OpenMP] Parse strictly- and loosely-structured blocks (#150298) + + Block-associated constructs have, as their body, either a strictly- or a + loosely-structured block. In the former case the end-directive is + optional. + + The existing parser required the end-directive to be present in all + cases. + + Note: + The definitions of these blocks in the OpenMP spec exclude cases where + the block contains more than one construct, and the first one is + BLOCK/ENDBLOCK. For example, the following is invalid: + ``` + !$omp target + block ! This cannot be a strictly-structured block, but + continue ! a loosely-structured block cannot start with + endblock ! BLOCK/ENDBLOCK + continue ! + !$omp end target + ``` + +commit 97faab7bc279516a31001621203f4ff5a158ed13 +Author: Kazu Hirata +Date: Wed Jul 23 08:33:32 2025 -0700 + + [flang] Fix a warning + + This patch fixes: + + flang/lib/Lower/Bridge.cpp:2128:10: error: unused variable 'result' + [-Werror,-Wunused-variable] + +commit fc0a978327215aa8883ae6f18d1e316f3c04520a +Author: Carlos Seo +Date: Wed Jul 23 11:16:11 2025 -0300 + + [Flang] Fix ASSIGN statement (#149941) + + Handle the case where the assigned variable also has a pointer + attribute. + + Fixes #121721 + +commit 43db6c5cc1a81b540ddca49bee197895c420ec2d +Author: Krzysztof Parzyszek +Date: Wed Jul 23 08:25:33 2025 -0500 + + [flang][OpenMP] General utility to get directive id from AST node (#150121) + + Fortran::parser::omp::GetOmpDirectiveName(t) will get the + OmpDirectiveName object that corresponds to construct t. That object (an + AST node) contains the enum id and the source information of the + directive. + + Replace uses of extractOmpDirective and getOpenMPDirectiveEnum with the + new function. + +commit 36c37b019b5daae79785e8558d693e6ec42b0ebd +Author: Kareem Ergawy +Date: Wed Jul 23 11:23:00 2025 +0200 + + [flang][OpenMP] Restore reduction processor behavior broken by #145837 (#150178) + + Fixes #149089 and #149700. + + Before #145837, when processing a reduction symbol not yet supported by + OpenMP lowering, the reduction processor would simply skip filling in + the reduction symbols and variables. With #145837, this behvaior was + slightly changed because the reduction symbols are populated before + invoking the reduction processor (this is more convenient to shared the + code with `do concurrent`). + + This PR restores the previous behavior. + +commit 0586067cf07bef0f04fd1dc7135a9b773ebaa07a +Author: Michael Kruse +Date: Wed Jul 23 10:18:13 2025 +0200 + + [Flang] Build fix without precompiled headers + + The header semantics.h is added implitly in the precompiled headers, but + the build was failing when precompiled headers are disabled (e.g. + using CMAKE_DISABLE_PRECOMPILE_HEADERS=ON): + + ``` + ../_src/flang/lib/Semantics/canonicalize-omp.cpp: In constructor ‘Fortran::semantics::CanonicalizationOfOmp::CanonicalizationOfOmp(Fortran::semantics::SemanticsContext&)’: + ../_src/flang/lib/Semantics/canonicalize-omp.cpp:31:38: error: invalid use of incomplete type ‘class Fortran::semantics::SemanticsContext’ + 31 | : context_{context}, messages_{context.messages()} {} + | ^~~~~~~ + In file included from ../_src/flang/lib/Semantics/canonicalize-omp.cpp:9: + ../_src/flang/lib/Semantics/canonicalize-omp.h:17:7: note: forward declaration of ‘class Fortran::semantics::SemanticsContext’ + 17 | class SemanticsContext; + | ^~~~~~~~~~~~~~~~ + compilation terminated due to -fmax-errors=1. + ``` + +commit 2914a488c7f49c4817bbfb86f74da04fd338b4eb +Author: Krzysztof Parzyszek +Date: Tue Jul 22 07:37:47 2025 -0500 + + [flang][OpenMP] Sema checks, lowering with new format of MAP modifiers (#149137) + + OpenMP 6.0 has changed the modifiers on the MAP clause. Previous patch + has introduced parsing support for them. This patch introduces + processing of the new forms in semantic checks and in lowering. This + only applies to existing modifiers, which were updated in the 6.0 spec. + Any of the newly introduced modifiers (SELF and REF) are ignored. + +commit b487f9a7bd15e453a3ff7fcbfbc54e54eecf26d3 +Author: Michael Kruse +Date: Tue Jul 22 11:39:01 2025 +0200 + + [Flang] Implement !$omp unroll using omp.unroll_heuristic (#144785) + + Add support for `!$omp unroll` in Flang and basic MLIR + `omp.canonical_loop` modeling. + + First step to add `omp.canonical_loop` modeling to the MLIR OpenMP + dialect with the goal of being more general than the current + `omp.loop_nest` approach: + * Support for non-perfectly nested loops + * Support for non-rectangular loops + * Support for arbitrary compositions of loop transformations + + This patch is functional end-to-end and adds support for `!$omp unroll` + to Flang. `!$omp unroll` is lowered to `omp.new_cli`, + `omp.canonical_loop`, and `omp.unroll_heuristic` in MLIR, which are + lowered to LLVM-IR using the OpenMPIRBuilder + (https://reviews.llvm.org/D107764). + +commit a3a007ad5fa20abc90ead4e1030b481bf109b4cf +Author: Maksim Levental +Date: Mon Jul 21 18:54:29 2025 -0500 + + [mlir][NFC] update `flang/Lower` create APIs (8/n) (#149912) + + See https://github.com/llvm/llvm-project/pull/147168 for more info. + +commit 2aa1e54fa1ff7f7c347e7108fe8650e94014c941 +Author: Krzysztof Parzyszek +Date: Mon Jul 21 10:55:37 2025 -0500 + + [flang][OpenMP] Parse OpenMP 6.0 map modifiers (#149134) + + OpenMP 6.0 has changed the modifiers on the MAP clause: + - map-type-modifier has been split into individual modifiers, + - map-type "delete" has become a modifier, + - new modifiers have been added. + + This patch adds parsing support for all of the OpenMP 6.0 modifiers. The + old "map-type-modifier" is retained, but is no longer created in + parsing. It will remain to take advantage of the preexisting modifier + validation for older versions: when the OpenMP version is < 6.0, the + modifiers will be rewritten back as map-type-modifiers (or map- type in + case of "delete"). + + In this patch the modifiers will always be rewritten in the older format + to isolate these changes to parsing as much as possible. + +commit 9e5b2fbe86ed9b303eff779fff012d6a96574f3d +Author: Peter Klausler +Date: Fri Jul 18 13:45:05 2025 -0700 + + [flang][runtime] Preserve type when remapping monomorphic pointers (#149427) + + Pointer remappings unconditionally update the element byte size and + derived type of the pointer's descriptor. This is okay when the pointer + is polymorphic, but not when a pointer is associated with an extended + type. + + To communicate this monomorphic case to the runtime, add a new entry + point so as to not break forward binary compatibility. + +commit 151fffccf1340d8a2800664cbcaaa579ba772a4c +Author: Kazu Hirata +Date: Fri Jul 18 08:05:12 2025 -0700 + + [flang] Migrate away from ArrayRef(std::nullopt_t) (#149454) + + ArrayRef(std::nullopt_t) has been deprecated. This patch replaces + std::nullopt with mlir::TypeRange{} or mlir::ValueRange{} as + appropriate. + +commit 2a7328dacae39e87ca4cc7548b9abcdba60b946b +Author: Kazu Hirata +Date: Thu Jul 17 15:23:55 2025 -0700 + + [flang] Migrate away from ArrayRef(std::nullopt_t) (#149337) + + ArrayRef(std::nullopt_t) has been deprecated. This patch replaces + std::nullopt with {}. + + A subsequence patch will address those places where we need to replace + std::nullopt with mlir::TypeRange{} or mlir::ValueRange{}. + +commit ff5784bb9094f6035851dc7abc4a5760fdc21e45 +Author: Krzysztof Parzyszek +Date: Thu Jul 17 12:11:12 2025 -0500 + + [flang][OpenMP] Move extractOmpDirective to Utils.cpp, NFC (#148653) + +commit 7c8a197918a0c4044c1be39a26d517eea95a5ec9 +Author: Kareem Ergawy +Date: Fri Jul 11 07:42:51 2025 +0200 + + [NFC][flang] Move `ReductionProcessor` to `Lower/Support`. (#146025) + + With #145837, the `ReductionProcessor` component is now used by both + OpenMP and `do concurrent`. Therefore, this PR moves it to a shared + location: `flang/Lower/Support`. + + PR stack: + - https://github.com/llvm/llvm-project/pull/145837 + - https://github.com/llvm/llvm-project/pull/146025 (this one) + - https://github.com/llvm/llvm-project/pull/146028 + - https://github.com/llvm/llvm-project/pull/146033 + +commit eba35cc1c0e4e2c59f9fd1f7a6f3b17cb4d8c765 +Author: Kareem Ergawy +Date: Fri Jul 11 06:39:30 2025 +0200 + + [flang][do concurrent] Re-model `reduce` to match reductions are modelled in OpenMP and OpenACC (#145837) + + This PR proposes re-modelling `reduce` specifiers to match OpenMP and + OpenACC. In particular, this PR includes the following: + + * A new `fir` op: `fir.delcare_reduction` which is identical to OpenMP's + `omp.declare_reduction` op. + * Updating the `reduce` clause on `fir.do_concurrent.loop` to use the + new op. + * Re-uses the `ReductionProcessor` component to emit reductions for `do + concurrent` just like we do for OpenMP. To do this, the + `ReductionProcessor` had to be refactored to be more generalized. + * Upates mapping `do concurrent` to `fir.loop ... unordered` nests using + the new reduction model. + + Unfortunately, this is a big PR that would be difficult to divide up in + smaller parts because the bottom of the changes are the `fir` table-gen + changes to `do concurrent`. However, doing these MLIR changes cascades + to the other parts that have to be modified to not break things. + + This PR goes in the same direction we went for `private/local` + speicifiers. Now the `do concurrent` and OpenMP (and OpenACC) dialects + are modelled in essentially the same way which makes mapping between + them more trivial, hopefully. + + PR stack: + - https://github.com/llvm/llvm-project/pull/145837 (this one) + - https://github.com/llvm/llvm-project/pull/146025 + - https://github.com/llvm/llvm-project/pull/146028 + - https://github.com/llvm/llvm-project/pull/146033 + +commit c919221bbe56fae15b509fcc84c25b0c041eb6b5 +Author: Valentin Clement (バレンタイン クレメン) +Date: Thu Jul 10 20:52:55 2025 -0700 + + [flang][cuda][NFC] Remove TODO implemented in semantic (#148058) + +commit 9a0e03f430dec4634086fe8315c4c3b730bd7c66 +Author: Valentin Clement (バレンタイン クレメン) +Date: Thu Jul 10 09:50:31 2025 -0700 + + [flang][cuda] Update implicit data transfer for device component (#147882) + + Update the detection of implicit data transfer when a device resident + allocatable derived-type component is involved and remove the TODOs. + +commit c4138a24dc254783697f512f053e794fb1c68f88 +Author: Vijay Kandiah +Date: Wed Jul 9 15:47:11 2025 -0500 + + [mlir][acc][flang] Lower nested ACC loops with tile clause as collapsed loops (#147801) + + In the case of nested loops, `acc.loop` is meant to subsume all of the + loops that it applies to (when explicitly described as doing so in the + OpenACC specification). So when there is a `acc loop tile(...)` present + on nested Fortran DO loops, `acc.loop` should apply to the `n` loops + that `tile` applies to. This change lowers such nested Fortran loops + with tile clause into a collapsed `acc.loop` with `n` IVs, loop bounds, + and step, in a similar fashion to the current lowering for acc loops + with `collapse` clause. + +commit c9900015a9a0bc2ccadae5e24b63ddbfe4d508fd +Author: Shunsuke Watanabe +Date: Wed Jul 9 13:43:54 2025 +0900 + + [flang] Add -fcomplex-arithmetic= option and select complex division algorithm (#146641) + + This patch adds an option to select the method for computing complex + number division. It uses `LoweringOptions` to determine whether to lower + complex division to a runtime function call or to MLIR's `complex.div`, + and `CodeGenOptions` to select the computation algorithm for + `complex.div`. The available option values and their corresponding + algorithms are as follows: + - `full`: Lower to a runtime function call. (Default behavior) + - `improved`: Lower to `complex.div` and expand to Smith's algorithm. + - `basic`: Lower to `complex.div` and expand to the algebraic algorithm. + + See also the discussion in the following discourse post: + https://discourse.llvm.org/t/optimization-of-complex-number-division/83468 + + --------- + + Co-authored-by: Tarun Prabhu + +commit 46caad52ac14cefd6f9cf3188863818e330f3844 +Author: Valentin Clement (バレンタイン クレメン) +Date: Tue Jul 8 10:52:15 2025 -0700 + + [flang][cuda] Do not produce data transfer in offloaded do concurrent (#147435) + + If a `do concurrent` loop is offloaded then there should be no CUDA data + transfer in it. Update the semantic and lowering to take that into + account. + + `AssignmentChecker` has to be put into a separate pass because the + checkers in `SemanticsVisitor` cannot have the same `Enter/Leave` + functions. The `DoForallChecker` already has `Eneter/Leave` functions + for the `DoConstruct`. + +commit 9a8d45f6268112dce4950cd2f21628963546082f +Author: Jack Styles +Date: Tue Jul 8 18:28:58 2025 +0100 + + [Flang][OpenMP] Fix crash when block.end() is missed (#147519) + + As reported in #145917 and #147309, there are situation's where flang + may crash. This is because `nextIt` in + `RewriteOpenMPLoopConstruct` gets re-assigned when an iterator is erased + from the block. If this is missed, Flang may attempt to access a + location in memory that is not accessable and cause a compiler crash. + + This adds protection where the crash can occur, and a test with a + reproducer that can trigger the crash. + + Fixes #147309 + +commit 65cb0eae58d2b668869f3e8f10cb79eb2b8c55ac +Author: Jack Styles +Date: Tue Jul 1 08:39:15 2025 +0100 + + [Flang][OpenMP] Add Semantics support for Nested OpenMPLoopConstructs (#145917) + + In OpenMP Version 5.1, the tile and unroll directives were added. When + using these directives, it is possible to nest them within other OpenMP + Loop Constructs. This patch enables the semantics to allow for this + behaviour on these specific directives. Any nested loops will be stored + within the initial Loop Construct until reaching the DoConstruct itself. + + Relevant tests have been added, and previous behaviour has been retained + with no changes. + + See also, #110008 + +commit faefe7cf7daf585a781af151726d31981cee9e4f +Author: jeanPerier +Date: Mon Jun 30 09:58:00 2025 +0200 + + [flang] add option to generate runtime type info as external (#146071) + + Reland #145901 with a fix for shared library builds. + + So far flang generates runtime derived type info global definitions (as + opposed to declarations) for all the types used in the current + compilation unit even when the derived types are defined in other + compilation units. It is using linkonce_odr to achieve derived type + descriptor address "uniqueness" aspect needed to match two derived type + inside the runtime. + + This comes at a big compile time cost because of all the extra globals + and their definitions in apps with many and complex derived types. + + This patch adds and experimental option to only generate the rtti + definition for the types defined in the current compilation unit and to + only generate external declaration for the derived type descriptor + object of types defined elsewhere. + + Note that objects compiled with this option are not compatible with + object files compiled without because files compiled without it may drop + the rtti for type they defined if it is not used in the compilation unit + because of the linkonce_odr aspect. + + I am adding the option so that we can better measure the extra cost of + the current approach on apps and allow speeding up some compilation + where devirtualization does not matter (and the build config links to + all module file object anyway). + +commit 344b5b7f9e5bb5c48ee3e9e380706038eaa89044 +Author: Krzysztof Parzyszek +Date: Sat Jun 28 13:38:00 2025 -0500 + + [flang][OpenMP] Move lowering of ATOMIC to separate file, NFC (#146225) + + Reinstate commits e5559ca4 and 925dbc79. Fix the issues with compilation + hangs by including DenseMapInfo specialization where the corresponding + instance of DenseMap was defined. + + Ref: https://github.com/llvm/llvm-project/pull/144960 + +commit dc6d2b841f22f3257721d9affba8edc4560f5e7a +Author: Krzysztof Parzyszek +Date: Fri Jun 27 09:44:16 2025 -0500 + + Revert "[flang][OpenMP] Move lowering of ATOMIC to separate file, NFC" (#146091) + + Reverts llvm/llvm-project#146067 + + This still causes timeouts, e.g. + + https://lab.llvm.org/buildbot/#/builders/207/builds/3023/steps/7/logs/stdio + +commit 302ed97b583f0529959b198366ffe892644007ba +Author: Krzysztof Parzyszek +Date: Fri Jun 27 08:19:16 2025 -0500 + + [flang][OpenMP] Move lowering of ATOMIC to separate file, NFC (#146067) + + Reinstate commits e5559ca4 and 925dbc79 with changes that avoid the + reported failures in Windows builds. + + Ref: https://github.com/llvm/llvm-project/pull/144960 + +commit 37e2d104994d36f848664660434b7f9d6e03ac2c +Author: jeanPerier +Date: Fri Jun 27 14:05:59 2025 +0200 + + Revert "[flang] add option to generate runtime type info as external" (#146064) + + Reverts llvm/llvm-project#145901 + + Broke shared library builds because of the usage of + `skipExternalRttiDefinition` in Lowering. + +commit 91f10df794d3293e18a56770acc1fd66fa0b7690 +Author: Akash Banerjee +Date: Fri Jun 27 13:05:22 2025 +0100 + + [Flang][OpenMP] Skip implicit mapping of named constants (#145966) + + Added early return when mapping named constants. + + This prevents linking error in the following example: + + ``` + program test + use, intrinsic :: iso_c_binding, only: c_double + implicit none + + real(c_double) :: x + integer :: i + x = 0.0_c_double + !$omp target teams distribute parallel do reduction(+:x) + do i = 0, 9 + x = x + 1.0_c_double + end do + !$omp end target teams distribute parallel do + end program test + ``` + +commit e816817bbb2889a42d8d984736971635d77816f3 +Author: jeanPerier +Date: Fri Jun 27 13:00:29 2025 +0200 + + [flang] add option to generate runtime type info as external (#145901) + + So far flang generates runtime derived type info global definitions (as + opposed to declarations) for all the types used in the current + compilation unit even when the derived types are defined in other + compilation units. It is using linkonce_odr to achieve derived type + descriptor address "uniqueness" aspect needed to match two derived type + inside the runtime. + + This comes at a big compile time cost because of all the extra globals + and their definitions in apps with many and complex derived types. + + This patch adds and experimental option to only generate the rtti + definition for the types defined in the current compilation unit and to + only generate external declaration for the derived type descriptor + object of types defined elsewhere. + + Note that objects compiled with this option are not compatible with + object files compiled without because files compiled without it may drop + the rtti for type they defined if it is not used in the compilation unit + because of the linkonce_odr aspect. + + I am adding the option so that we can better measure the extra cost of + the current approach on apps and allow speeding up some compilation + where devirtualization does not matter (and the build config links to + all module file object anyway). + +commit 938cdb30f16f4fefc4c7177d7a47fb571a297c43 +Author: Kazu Hirata +Date: Thu Jun 26 12:41:49 2025 -0700 + + [flang] Migrate away from std::nullopt (NFC) (#145928) + + ArrayRef has a constructor that accepts std::nullopt. This + constructor dates back to the days when we still had llvm::Optional. + + Since the use of std::nullopt outside the context of std::optional is + kind of abuse and not intuitive to new comers, I would like to move + away from the constructor and eventually remove it. + + This patch replaces std::nullopt with {}. There are a couple of + places where std::nullopt is replaced with TypeRange() to accommodate + perfect forwarding. + +commit cfdc4c4a5b671646cb08aeab106103cc7006cb89 +Author: Muhammad Omair Javaid +Date: Thu Jun 26 18:32:20 2025 +0500 + + Revert "[flang][OpenMP] Move lowering of ATOMIC to separate file, NFC (#144960)" + + PR#144960 broke check-flang tests on Windows (x64/ARM64). + + This reverts commit e5559ca45f211f2cdd9c81e46935afe1cc2e22ab. + +commit d0469d1d3c31e919dba97637ac7ab063c44118e5 +Author: Jacques Pienaar +Date: Wed Jun 25 10:58:44 2025 +0200 + + [mlir] Move WalkResult to Support (#145649) + + This also enables moving StateStack, both are relatively generic helper + structs not tied to IR. + +commit 77af8bff97a0b20dac9ff9a95385d036da7d8ba5 +Author: Lance Wang +Date: Tue Jun 24 21:00:13 2025 -0700 + + [mlir]Moves the StateStack to IR folder from Support folder. (#145598) + + [MLIR] Fix circular dependency introduced in In + https://github.com/llvm/llvm-project/pull/144897. This PR is to break + the dependency. by moving StateStack to IR folder + + This commit resolves a circular dependency issue between mlir/Support + and mlir/IR: + + - Move StateStack.h and StateStack.cpp from Support to IR folder + - Update CMakeLists.txt files to reflect the new locations + - Update Bazel BUILD file to maintain correct dependencies + - Update includes in affected files (flang, Target/LLVMIR) + + The circular dependency was caused by StateStack.h depending on + IR/Visitors.h + while other IR files depended on Support. Moving StateStack to IR + eliminates + this cycle while maintaining proper separation of concerns. + +commit 8f7f48a97ea53161e046eeb52a8020f228d79a00 +Author: Tom Eccles +Date: Tue Jun 24 18:30:37 2025 +0100 + + [flang][OpenMP][NFC] remove globals with mlir::StateStack (#144898) + + Idea suggested by @skatrak + +commit e970f59e6b20dddc4369735affb79ca9be240c1c +Author: NimishMishra <42909663+NimishMishra@users.noreply.github.com> +Date: Fri Jun 20 17:23:00 2025 +0530 + + [flang][OpenMP] Reintroduce TODO for FIR lowering of linear clause (#144883) + + Current design of the linear clause lowering and translation shifts all + responsibility for handling the clause (like privatisation, linear + stepping, finalisation, and emission of synchronisation barriers) to the + IRBuilder. However in certain corner cases (like associated loops in or + before OpenMP version 4.5), variables are are implicitly linear. This + currently causes a problem with the existing linear clause + implementation. Hence, re-introduce TODO on the linear clause until the + linear clause lowering/translation are robust enough to handle such + cases as well. + + Fixes https://github.com/llvm/llvm-project/issues/142935 + +commit e5559ca45f211f2cdd9c81e46935afe1cc2e22ab +Author: Krzysztof Parzyszek +Date: Fri Jun 20 06:44:14 2025 -0500 + + [flang][OpenMP] Move lowering of ATOMIC to separate file, NFC (#144960) + +commit 9fd22cb56d4c626769afd938e0f9ef6157164394 +Author: Peter Klausler +Date: Thu Jun 19 13:42:46 2025 -0700 + + [flang][NFC] Move new code to right place (#144551) + + Some new code was added to flang/Semantics that only depends on + facilities in flang/Evaluate. Move it into Evaluate and clean up some + minor stylistic problems. + +commit 89efae916a5de0387710b7dc06938423817e1503 +Author: Jack Styles +Date: Thu Jun 19 15:32:27 2025 +0100 + + [Flang][OpenMP] Update default MapType for Map Clauses and OpenMP 5.2 (#144715) + + In OpenMP 5.2, the `target enter data` and `target exit data` constructs + now have default map types if the user does not define them in the Map + clause. For `target enter data`, this is `to` and `target exit data` + this is `from`. This behaviour is now enabled when OpenMP 5.2 or greater + is used when compiling. To enable this, the default value is now set in + the `processMap` clause, with any previous behaviour being maintained + for either older versions of OpenMP or other directives. + + See also #110008 + +commit 936c5566db013225dc098ff961395bb19e1bf2a4 +Author: Krzysztof Parzyszek +Date: Thu Jun 19 07:18:21 2025 -0500 + + [flang][OpenMP] Handle REQUIRES ADMO in lowering (#144362) + + The previous approach rewrote the atomic constructs in the AST based on + the REQUIRES ATOMIC_DEFAULT_MEM_ORDER directives. The new approach + checks for incorrect uses of REQUIRED ADMO in the semantic analysis, and + applies it in lowering, eliminating the need for a separate + tree-rewriting procedure. + +commit 97e17e15957bf6f03923ca46301b32cad507f34b +Author: Kareem Ergawy +Date: Tue Jun 17 11:34:05 2025 +0200 + + Revert "[flang] Enable delayed localization by default for `do concurrent` (#144074)" (#144476) + + This reverts commit b5dbf8210a57b986b9802304745f4c5c108cf37b. + + Reverting again due to gfortran failure: + https://lab.llvm.org/buildbot/#/builders/17/builds/8868 + +commit 2dc58e02cbce83784a38b4cc33f83529ad1a7c7e +Author: Kareem Ergawy +Date: Tue Jun 17 07:01:53 2025 +0200 + + [flang][OpenMP] Add symbol table scopes for `teams` and `parallel` (#144015) + + Adds symbol map scopes for standalone `teams` and `parallel` constructs. + This is required to properly bind the privatized symbols in both + constructs so that nested constructs can find them. + + Resolves https://github.com/llvm/llvm-project/issues/116428. + +commit b5dbf8210a57b986b9802304745f4c5c108cf37b +Author: Kareem Ergawy +Date: Tue Jun 17 06:08:38 2025 +0200 + + [flang] Enable delayed localization by default for `do concurrent` (#144074) + + Reintroduces changes from + https://github.com/llvm/llvm-project/issues/143897. A fix for the + reported problem in https://github.com/llvm/llvm-project/issues/143897 + is hopefully resolved in + https://github.com/llvm/llvm-project/pull/144027. + + This PR aims to make it easier and more self-contained to revert the + switch/flag if we discover any problems with enabling it by default. + +commit 7caeec599998bd8aa01d498574e148e4e9c982db +Author: Kareem Ergawy +Date: Tue Jun 17 06:08:15 2025 +0200 + + [NFC][flang][OpenMP] Unify `genSectionsOp`'s prototype to match other `genXXXOp` functions (#144013) + + Unifies the prototype of `genSectionsOp` to match other ops generators. + Doing so, we are able to call `genSectionsOp` directtly from + `genOMPDispatch` instead of the special handling needed now to pass the + section blocks. This is useful because now we can handle symbol mapping + scopes easier for nested OpenMP directives. See + + https://github.com/llvm/llvm-project/pull/143706#issuecomment-2965344723 + and the following discussion for more info. + +commit f12b1ed11672bc40a53fb1180541b2fda6e7d9fc +Author: Kajetan Puchalski +Date: Thu Jun 12 16:35:36 2025 +0100 + + [flang][OpenMP] Add TODOs for target [teams|parallel] private (#143706) + + Using the private clause on `target teams` or `target parallel` is not + currently implemented and causes crashes during lowering. Add + appropriate TODOs. + + Resolves https://github.com/llvm/llvm-project/issues/116428. + + Signed-off-by: Kajetan Puchalski + +commit 4bd0a0e50bcfc3263c219acc9709ae234a334456 +Author: Kareem Ergawy +Date: Thu Jun 12 17:09:55 2025 +0200 + + Revert "[flang] Enable delayed localization by default for `do concurrent` (#142567)" (#143905) + + This reverts commit 937be177528de156922c1b5f6cab08ba3009dbf2. + + Resolves https://github.com/llvm/llvm-project/issues/143897 until the + todo is properly handled. + +commit 91be47dccfa3480c152916838404d49107fde45c +Author: Kazu Hirata +Date: Wed Jun 11 08:53:54 2025 -0700 + + [flang] Fix warnings + + This patch fixes: + + flang/lib/Lower/OpenMP/OpenMP.cpp:3904:9: error: unused variable + 'action0' [-Werror,-Wunused-variable] + + flang/lib/Lower/OpenMP/OpenMP.cpp:3905:9: error: unused variable + 'action1' [-Werror,-Wunused-variable] + +commit 141d390dcb6cd174b07ca663e58f37ab24eee08a +Author: Krzysztof Parzyszek +Date: Wed Jun 11 10:05:34 2025 -0500 + + [flang][OpenMP] Overhaul implementation of ATOMIC construct (#137852) + + The parser will accept a wide variety of illegal attempts at forming an + ATOMIC construct, leaving it to the semantic analysis to diagnose any + issues. This consolidates the analysis into one place and allows us to + produce more informative diagnostics. + + The parser's outcome will be parser::OpenMPAtomicConstruct object + holding the directive, parser::Body, and an optional end-directive. The + prior variety of OmpAtomicXyz classes, as well as OmpAtomicClause have + been removed. READ, WRITE, etc. are now proper clauses. + + The semantic analysis consistently operates on "evaluation" + representations, mainly evaluate::Expr (as SomeExpr) and + evaluate::Assignment. The results of the semantic analysis are stored in + a mutable member of the OpenMPAtomicConstruct node. This follows a + precedent of having `typedExpr` member in parser::Expr, for example. + This allows the lowering code to avoid duplicated handling of AST nodes. + + Using a BLOCK construct containing multiple statements for an ATOMIC + construct that requires multiple statements is now allowed. In fact, any + nesting of such BLOCK constructs is allowed. + + This implementation will parse, and perform semantic checks for both + conditional-update and conditional-update-capture, although no MLIR will + be generated for those. Instead, a TODO error will be issues prior to + lowering. + + The allowed forms of the ATOMIC construct were based on the OpenMP 6.0 + spec. + +commit 937be177528de156922c1b5f6cab08ba3009dbf2 +Author: Kareem Ergawy +Date: Wed Jun 11 10:10:22 2025 +0200 + + [flang] Enable delayed localization by default for `do concurrent` (#142567) + + This PR aims to make it easier and more self-contained to revert the + switch/flag if we discover any problems with enabling it by default. + +commit b994a4c04f38d8cfb13f3dbf3d99146cb778443e +Author: Peter Klausler +Date: Tue Jun 10 14:44:41 2025 -0700 + + [flang][NFC] Clean up code in two new functions (#142037) + + Two recently-added functions in Semantics/tools.h need some cleaning up + to conform to the coding style of the project. One of them should + actually be in Parser/tools.{h,cpp}, the other doesn't need to be + defined in the header. + +commit bac4aa440c12b2f90a1e12ab8aa6e3f842beb387 +Author: Kareem Ergawy +Date: Thu Jun 5 01:01:53 2025 +0200 + + [flang] Extend localization support for `do concurrent` (`init` regions) (#142564) + + Extends support for locality specifiers in `do concurrent` by supporting + data types that need `init` regions. + + This further unifies the paths taken by the compiler for OpenMP + privatization clauses and `do concurrent` locality specifiers. + +commit aac1f85393e74b643d08c948c3c2da156a231073 +Author: Leandro Lupori +Date: Tue Jun 3 10:58:23 2025 -0300 + + [flang][OpenMP] Explicitly set Shared DSA in symbols (#142154) + + Before this change, OmpShared was not always set in shared symbols. + Instead, absence of private flags was interpreted as shared DSA. + The problem was that symbols with no flags, with only a host + association, could also mean "has same DSA as in the enclosing + context". Now shared symbols behave the same as private and can be + treated the same way. + + Because of the host association symbols with no flags mentioned + above, it was also incorrect to simply test the flags of a given + symbol to find out if it was private or shared. The function + GetSymbolDSA() was added to fix this. It would be better to avoid + the need of these special symbols, but this would require changes + to how symbols are collected in lowering. + + Besides that, some semantic checks need to know if a DSA clause + was used or not. To avoid confusing implicit symbols with DSA + clauses a new flag was added: OmpExplicit. It is now set for all + symbols with explicitly determined data-sharing attributes. + + With the changes above, AddToContextObjectWithDSA() and the symbol + to DSA map could probably be removed and the DSA could be obtained + directly from the symbol, but this was not attempted. + + Some debug messages were also added, with the "omp" DEBUG_TYPE, to + make it easier to debug the creation of implicit symbols and to + visualize all associations of a given symbol. + + Fixes #130533 + Fixes #140882 + +commit 99ae675fb7957f3eb8b65e9086dae4bbc722f221 +Author: Akash Banerjee +Date: Fri May 30 14:39:03 2025 +0100 + + [NFC][OpenMP] Move the default declare mapper name suffix to OMPConstants.h (#141964) + + This patch moves the default declare mapper name suffix + ".omp.default.mapper" to the OMPConstants.h file to be used everywhere + for lowering. + +commit f8dcb059ae06376b0991936026d5befb3d7b109b +Author: Kareem Ergawy +Date: Thu May 29 13:13:44 2025 +0200 + + [flang][fir][OpenMP] Refactor privtization code into shared location (#141767) + + Refactors the utils needed to create privtization/locatization ops for + both the fir and OpenMP dialects into a shared location isolating OpenMP + stuff out of it as much as possible. + +commit 7e9887a50df2de9c666f5e7ceb46c27bfccc618f +Author: Kareem Ergawy +Date: Thu May 29 12:27:03 2025 +0200 + + [flang] Generlize names of delayed privatization CLI flags (#138816) + + Remove the `openmp` prefix from delayed privatization/localization flags + since they are now used for `do concurrent` as well. + + PR stack: + - https://github.com/llvm/llvm-project/pull/137928 + - https://github.com/llvm/llvm-project/pull/138505 + - https://github.com/llvm/llvm-project/pull/138506 + - https://github.com/llvm/llvm-project/pull/138512 + - https://github.com/llvm/llvm-project/pull/138534 + - https://github.com/llvm/llvm-project/pull/138816 (this PR) + +commit e33cd9690fe11305acd7df35532d712844b9049e +Author: Kareem Ergawy +Date: Thu May 29 11:04:27 2025 +0200 + + [flang][fir] Basic PFT to MLIR lowering for do concurrent locality specifiers (#138534) + + Extends support for `fir.do_concurrent` locality specifiers to the PFT + to MLIR level. This adds code-gen for generating the newly added + `fir.local` ops and referencing these ops from `fir.do_concurrent.loop` + ops that have locality specifiers attached to them. This reuses the + `DataSharingProcessor` component and generalizes it a bit more to allow + for handling `omp.private` ops and `fir.local` ops as well. + + + PR stack: + - https://github.com/llvm/llvm-project/pull/137928 + - https://github.com/llvm/llvm-project/pull/138505 + - https://github.com/llvm/llvm-project/pull/138506 + - https://github.com/llvm/llvm-project/pull/138512 + - https://github.com/llvm/llvm-project/pull/138534 (this PR) + - https://github.com/llvm/llvm-project/pull/138816 + +commit 59b7b5b6b5c032ed21049d631eb5d67091f3a21c +Author: Akash Banerjee +Date: Wed May 28 14:32:17 2025 +0100 + + [OpenMP][Flang] Fix semantic check and scoping for declare mappers (#140560) + + The current semantic check in place is incorrect, this patch fixes this. + + Up to 1 **'default'** named mapper should be allowed for each derived + type. + The current semantic check only allows up to 1 **'default'** named + mapper across all derived types. + + This also makes sure that declare mappers follow proper scoping rules + for both default and named mappers. + + Co-authored-by: Raghu Maddhipatla + +commit 5530474e3e84edd02c85043c60e4df967fee7f26 +Author: Yang Zaizhou <91008302+Mxfg-incense@users.noreply.github.com> +Date: Fri May 23 20:15:10 2025 +0800 + + [Flang][OpenMP] fix crash on sematic error in atomic capture clause (#140710) + + Fix a crash caused by an invalid expression in the atomic capture + clause, due to the `checkForSymbolMatch` function not accounting for + `GetExpr` potentially returning null. + + Fix https://github.com/llvm/llvm-project/issues/139884 + +commit 0baacd1a58420f7e4da14faa1f0e9a21d5294a6a +Author: NimishMishra <42909663+NimishMishra@users.noreply.github.com> +Date: Tue May 20 12:03:06 2025 +0530 + + [flang][OpenMP] Support MLIR lowering of linear clause for omp.wsloop (#139385) + + This patch adds support for MLIR lowering of linear clause on omp.wsloop + (except for linear modifiers). + +commit bbb7f0148177d332df80b5cfdc7d161dca289056 +Author: Asher Mancinelli +Date: Tue May 13 08:13:47 2025 -0700 + + [flang] Fix volatile attribute propagation on allocatables (#139183) + + Ensure volatility is reflected not just on the reference to an + allocatable, but on the box, too. When we declare a volatile + allocatable, we now get a volatile reference to a volatile box. + + Some related cleanups: + * SELECT TYPE constructs check the selector's type for volatility when + creating and designating the type used in the selecting block. + * Refine the verifier for fir.convert. In general, I think it is ok to + implicitly drop volatility in any ptr-to-int conversion because it means + we are in codegen (and representing volatility on the LLVM ops and + intrinsics) or we are calling an external function (are there any cases + I'm not thinking of?) + * An allocatable test that was XFAILed is now passing. Making + allocatables' boxes volatile resulted in accesses of those boxes being + volatile, which resolved some errors coming from the strict verifier. + * I noticed a runtime function was missing the fir.runtime attribute. + +commit 8a9e767fa690e0232db2cfa8576f53b48ae53f30 +Author: Pranav Bhandarkar +Date: Mon May 12 22:34:58 2025 -0500 + + [Flang][MLIR] - Handle the mapping of subroutine arguments when they are subsequently used inside the region of an `omp.target` Op (#134967) + + This is a fix for https://github.com/llvm/llvm-project/issues/134912 + which is a problem with mapping `fir.boxchar` type values to the + target i.e an `omp.target` op. + + There really are two problems. Fixing the first exposed the second. The + first problem is that OpenMP lowering of maps in `omp.target` in Flang + cannot handle the mapping of a value that doesnt have a defining + operation. In other words, a value that is a block argument. This is handled + by mapping the value using a `MapInfoOp`. + The second problem this fixes is that it adds bounds to `omp.map.info` + ops that map `fir.char` types by extracting the length from the + corresponding `fir.boxchar` + +commit 09b772e2efad804fdda02e2bd9ee44a2aaaddeeb +Author: Slava Zakharin +Date: Mon May 12 14:03:15 2025 -0700 + + [flang] Postpone hlfir.end_associate generation for calls. (#138786) + + If we generate hlfir.end_associate at the end of the statement, + we get easier optimizable HLFIR, because there are no compiler + generated operations with side-effects in between the call + and the consumers. This allows more hlfir.eval_in_mem to reuse + the LHS instead of allocating temporary buffer. + + I do not think the same can be done for hlfir.copy_out always, e.g.: + ``` + subroutine test2(x) + interface + function array_func2(x,y) + real:: x(*), array_func2(10), y + end function array_func2 + end interface + real :: x(:) + x = array_func2(x, 1.0) + end subroutine test2 + ``` + + If we postpone the copy-out until after the assignment, then + the result may be wrong. + +commit eef4b5a0cdf102e5035d6d4f1aa5f85b2b787e84 +Author: Zhen Wang <37195552+wangzpgi@users.noreply.github.com> +Date: Mon May 12 10:06:39 2025 -0700 + + [flang] [cuda] Fix CUDA implicit data transfer entity creation (#139414) + + Fixed an issue in `genCUDAImplicitDataTransfer` where creating an + `hlfir::Entity` from a symbol address could fail when the address comes + from a `hlfir.declare` operation. Fix is to check if the address comes + from a `hlfir.declare` operation. If so, use the base value from the + declare op when available. Falling back to the original address + otherwise. + +commit 939bb4e028499a3eda783567cda7d5331ba0c242 +Author: agozillon +Date: Mon May 12 10:49:26 2025 -0500 + + [NFC] Add const to newly added helper functions from PR #135226 + +commit f687ed9ff717372a7c751a3bf4ef7e33eb481fd6 +Author: agozillon +Date: Mon May 12 16:30:43 2025 +0200 + + [Flang][OpenMP] Initial defaultmap implementation (#135226) + + This aims to implement most of the initial arguments for defaultmap + aside from firstprivate and none, and some of the more recent OpenMP 6 + additions which will come in subsequent updates (with the OpenMP 6 + variants needing parsing/semantic support first). + +commit 4d9479fa8f4e949bc4c5768477cd36687c1c6b29 +Author: Andre Kuhlenschmidt +Date: Fri May 9 11:12:24 2025 -0700 + + [flang][openacc] Allow open acc routines from other modules. (#136012) + + OpenACC routines annotations in separate compilation units currently get + ignored, which leads to errors in compilation. There are two reason for + currently ignoring open acc routine information and this PR is + addressing both. + - The module file reader doesn't read back in openacc directives from + module files. + - Simple fix in `flang/lib/Semantics/mod-file.cpp` + - The lowering to HLFIR doesn't generate routine directives for symbols + imported from other modules that are openacc routines. + - This is the majority of this diff, and is address by the changes that + start in `flang/lib/Lower/CallInterface.cpp`. + +commit 7aed77ef954f83cc52dad3eba4f51470e21b1cb0 +Author: NimishMishra <42909663+NimishMishra@users.noreply.github.com> +Date: Fri May 9 21:55:21 2025 +0530 + + [flang][OpenMP] Add implicit casts for omp.atomic.capture (#138163) + + This patch adds support for emitting implicit casts for atomic capture + if its constituent operations have different yet compatible types. + + Fixes: https://github.com/llvm/llvm-project/issues/138123 and + https://github.com/llvm/llvm-project/issues/94177 + +commit a68f35a17db03a6633a660d310156f4e2f17197f +Author: Krzysztof Parzyszek +Date: Fri May 9 07:42:15 2025 -0500 + + [flang][OpenMP] Pass OpenMP version to getOpenMPDirectiveName (#139131) + + The OpenMP version is stored in LangOptions in SemanticsContext. Use the + fallback version where SemanticsContext is unavailable (mostly in case + of debug dumps). + + RFC: + https://discourse.llvm.org/t/rfc-alternative-spellings-of-openmp-directives/85507 + + Reland with a fix for build break in f18-parse-demo. + +commit 89822ff5a8608570897c21a3c40fb450c53f603f +Author: Krzysztof Parzyszek +Date: Fri May 9 07:55:13 2025 -0500 + + Revert "[flang][OpenMP] Pass OpenMP version to getOpenMPDirectiveName (#139131)" + + This reverts commit 41aa67488c3ca33334ec79fb5216145c3644277c. + + Breaks build: https://lab.llvm.org/buildbot/#/builders/140/builds/22826 + +commit 41aa67488c3ca33334ec79fb5216145c3644277c +Author: Krzysztof Parzyszek +Date: Fri May 9 07:42:15 2025 -0500 + + [flang][OpenMP] Pass OpenMP version to getOpenMPDirectiveName (#139131) + + The OpenMP version is stored in LangOptions in SemanticsContext. Use the + fallback version where SemanticsContext is unavailable (mostly in case + of debug dumps). + + RFC: + https://discourse.llvm.org/t/rfc-alternative-spellings-of-openmp-directives/85507 + +commit b291cfcad4815568dc1eaca58185d25dceed3f1c +Author: agozillon +Date: Fri May 9 13:57:45 2025 +0200 + + [Flang][OpenMP] Generate correct present checks for implicit maps of optional allocatables (#138210) + + Currently, we do not generate the appropriate checks to check if an + optional + allocatable argument is present before accessing relevant components of + it, + in particular when creating bounds, we must generate a presence check + and we + must make sure we do not generate/keep an load external to the presence + check + by utilising the raw address rather than the regular address of the info + data structure. + + Similarly in cases for optional allocatables we must treat them like + non-allocatable + arguments and generate an intermediate allocation that we can have as a + location + in memory that we can access later in the lowering without causing + segfaults when + we perform "mapping" on it, even if the end result is an empty + allocatable + (basically, we shouldn't explode if someone tries to map a non-present + optional, + similar to C++ when mapping null data). + +commit dd42112c82d7b12669513dca4048167664b211b2 +Author: Kaviya Rajendiran <67495422+kaviya2510@users.noreply.github.com> +Date: Fri May 9 17:26:28 2025 +0530 + + [Flang][OpenMP] MLIR lowering support for grainsize and num_tasks clause (#128490) + + - Added MLIR lowering for grainsize and num_tasks clauses of taskloop construct. + +commit 227e1ff73b6c0cbdd912c69405777f7121dc0760 +Author: Kareem Ergawy +Date: Thu May 8 21:42:52 2025 +0200 + + [flang][fir] Add locality specifiers modeling to `fir.do_concurrent.loop` (#138506) + +commit 2a32d738bb213a8a1e814b65beb61e39b7c66834 +Author: Tom Eccles +Date: Thu May 8 10:08:49 2025 +0100 + + [flang][OpenMP] fix predetermined privatization inside section (#138159) + + This now produces code equivalent to if there was an explicit private + clause on the SECTIONS construct. + + The problem was that each SECTION construct got its own DSP, which tried + to privatize the same symbol for that SECTION. Privatization for + SECTION(S) happens on the outer SECTION construct and so the outer + construct's DSP should be shared. + + Fixes #135108 + +commit 2fb288d4b8e0fb6c08a1a72b64cbf6a0752fdac7 +Author: Kareem Ergawy +Date: Wed May 7 12:52:25 2025 +0200 + + [flang][fir] Lower `do concurrent` loop nests to `fir.do_concurrent` (#137928) + + Adds support for lowering `do concurrent` nests from PFT to the new + `fir.do_concurrent` MLIR op as well as its special terminator + `fir.do_concurrent.loop` which models the actual loop nest. + + To that end, this PR emits the allocations for the iteration variables + within the block of the `fir.do_concurrent` op and creates a region for + the `fir.do_concurrent.loop` op that accepts arguments equal in number + to the number of the input `do concurrent` iteration ranges. + + For example, given the following input: + ```fortran + do concurrent(i=1:10, j=11:20) + end do + ``` + the changes in this PR emit the following MLIR: + ```mlir + fir.do_concurrent { + %22 = fir.alloca i32 {bindc_name = "i"} + %23:2 = hlfir.declare %22 {uniq_name = "_QFsub1Ei"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %24 = fir.alloca i32 {bindc_name = "j"} + %25:2 = hlfir.declare %24 {uniq_name = "_QFsub1Ej"} : (!fir.ref) -> (!fir.ref, !fir.ref) + fir.do_concurrent.loop (%arg1, %arg2) = (%18, %20) to (%19, %21) step (%c1, %c1_0) { + %26 = fir.convert %arg1 : (index) -> i32 + fir.store %26 to %23#0 : !fir.ref + %27 = fir.convert %arg2 : (index) -> i32 + fir.store %27 to %25#0 : !fir.ref + } + } + ``` + +commit 75e5643abf6b59db8dfae6b524e9c3c2ec0ffc29 +Author: Tom Eccles +Date: Wed May 7 10:18:13 2025 +0100 + + [flang][OpenMP] share global variable initialization code (#138672) + + Fixes #108136 + + In #108136 (the new testcase), flang was missing the length parameter + required for the variable length string when boxing the global variable. + The code that is initializing global variables for OpenMP did not + support types with length parameters. + + Instead of duplicating this initialization logic in OpenMP, I decided to + use the exact same initialization as is used in the base language + because this will already be well tested and will be updated for any new + types. The difference for OpenMP is that the global variables will be + zero initialized instead of left undefined. + + Previously `Fortran::lower::createGlobalInitialization` was used to + share a smaller amount of the logic with the base language lowering. I + think this bug has demonstrated that helper was too low level to be + helpful, and it was only used in OpenMP so I have made it static inside + of ConvertVariable.cpp. + +commit a13c0b67708173b8033a53ff6ae4c46c5b80bb2b +Author: Kiran Chandramohan +Date: Wed May 7 09:56:45 2025 +0100 + + [Flang][OpenMP] Add frontend support for declare variant (#130578) + + Support is added for parsing. Basic semantics support is added to + forward the code to Lowering. Lowering will emit a TODO error. Detailed + semantics checks and lowering is further work. + +commit e1fed24034fee3f45bc17252ced5ee29ab6b5408 +Author: NimishMishra <42909663+NimishMishra@users.noreply.github.com> +Date: Wed May 7 11:14:48 2025 +0530 + + [flang][OpenMP] Fix fir.convert in omp.atomic.update region (#138397) + + Region generation in omp.atomic.update currently emits a direct + `fir.convert`. This crashes when the RHS expression involves complex + type but the LHS variable is primitive type (say `f32`), since a + `fir.convert` from `complex` to `f32` is emitted, which is illegal. + This PR adds a conditional check to emit an additional `ExtractValueOp` + in case RHS expression has a complex type. + + Fixes https://github.com/llvm/llvm-project/issues/138396 + +commit 9e7d529607ebde67af5b214a654de82cfa2ec8c4 +Author: Kaviya Rajendiran <67495422+kaviya2510@users.noreply.github.com> +Date: Wed May 7 10:25:56 2025 +0530 + + [Flang][OpenMP]Support for lowering task_reduction and in_reduction to MLIR (#111155) + + This patch, + - Added support for lowering of task_reduction to MLIR + - Added support for lowering of in_reduction to MLIR + - Fixed incorrect DSA handling for variables in the presence of 'in_reduction' clause. + +commit e356893551b315c84f30f7828eb493c4ef02e118 +Author: Kaviya Rajendiran <67495422+kaviya2510@users.noreply.github.com> +Date: Tue May 6 19:47:22 2025 +0530 + + [Flang][OpenMP] Support for lowering of taskloop construct to MLIR (#138646) + + Added support for lowering of taskloop construct and its clauses(Private + and Firstprivate) to MLIR. + +commit c61746650178c117996e1787617f36ccda7233f7 +Author: NimishMishra <42909663+NimishMishra@users.noreply.github.com> +Date: Thu May 1 20:50:42 2025 +0530 + + [flang][llvm][OpenMP] Add implicit casts to omp.atomic (#131603) + + Currently, implicit casts in Fortran are handled by the OMPIRBuilder. + This patch shifts that responsibility to FIR codegen. + +commit 8836bce84208737f5807c396345a41e688d3ec11 +Author: Asher Mancinelli +Date: Wed Apr 30 08:46:33 2025 -0700 + + [flang] Add lowering of volatile references (#132486) + + [RFC on + discourse](https://discourse.llvm.org/t/rfc-volatile-representation-in-flang/85404/1) + + Flang currently lacks support for volatile variables. For some cases, + the compiler produces TODO error messages and others are ignored. Some + of our tests are like the example from _C.4 Clause 8 notes: The VOLATILE + attribute (8.5.20)_ and require volatile variables. + + Prior commits: + ``` + c9ec1bc753b0 [flang] Handle volatility in lowering and codegen (#135311) + e42f8609858f [flang][nfc] Support volatility in Fir ops (#134858) + b2711e1526f9 [flang][nfc] Support volatile on ref, box, and class types (#134386) + ``` + +commit 7dd8122d4ea147a2e8b90d611e30d4c2cff4619f +Author: Pranav Bhandarkar +Date: Tue Apr 29 14:53:15 2025 -0500 + + [Flang][MLIR][OpenMP] - Add support for firstprivate when translating omp.target ops from MLIR to LLVMIR (#131213) + + This patch adds support to translate `firstprivate` clauses on `omp.target` ops when translating from MLIR to LLVMIR. + Presently, this PR is restricted to supporting only included tasks, i.e `#omp target nowait firstprivate(some_variable)` will likely not work correctly even if it produces object code. + +commit 9ea5254f77ae5d5fe8e81f8e39b5d461cc95e9dc +Author: Krzysztof Parzyszek +Date: Mon Apr 28 15:43:39 2025 -0500 + + [flang][OpenACC][OpenMP] Separate implementations of ATOMIC constructs (#137517) + + The OpenMP implementation of the ATOMIC construct will change in the + near future to accommodate atomic conditional-update and conditional- + update-capture operations. This patch separates the shared implemen- + tations to avoid interfering with OpenACC. + +commit 46e734746db7176f6e32b3c98beacf1e94fced37 +Author: Valentin Clement (バレンタイン クレメン) +Date: Mon Apr 21 19:22:07 2025 -0700 + + [flang][cuda] Update stream type for cuf kernel op (#136627) + + Update the type of the stream operand to be similar to KernelLaunchOp. + +commit 50db7a7d269b42f0cda63eb005aadfdbe25f56cb +Author: Slava Zakharin +Date: Fri Apr 18 17:19:12 2025 -0700 + + [flang] Fixed fir.dummy_scope generation to work for TBAA. (#136382) + + The nesting of fir.dummy_scope operations defines the roots + of the TBAA forest. If we do not generate fir.dummy_scope + in functions that do not have any dummy arguments, then + the globals accessed in the function and the dummy arguments + accessed by the callee may end up in different sub-trees + of the same root. The added tbaa-with-dummy-scope2.fir + demonstrates the issue. + +commit 30990c09c99bdcbfa7084d32b2b9851e19b6fb2a +Author: Kareem Ergawy +Date: Wed Apr 16 14:20:27 2025 +0200 + + Revert "[flang][fir] Lower `do concurrent` loop nests to `fir.do_concurrent` (#132904)" (#135904) + + This reverts commit 04b87e15e40f8857e29ade8321b8b67691545a50. + + The reasons for reverting is that the following: + 1. I still need need to upstream some part of the do concurrent to + OpenMP pass from our downstream implementation and taking this in + downstream will make things more difficult. + 2. I still need to work on a solution for modeling locality specifiers + on `hlfir.do_concurrent` ops. I would prefer to do that and merge the + entire stack together instead of having a partial solution. + + After merging the revert I will reopen the origianl PR and keep it + updated against main until I finish the above. + +commit 04b87e15e40f8857e29ade8321b8b67691545a50 +Author: Kareem Ergawy +Date: Wed Apr 16 06:14:38 2025 +0200 + + [flang][fir] Lower `do concurrent` loop nests to `fir.do_concurrent` (#132904) + + Adds support for lowering `do concurrent` nests from PFT to the new + `fir.do_concurrent` MLIR op as well as its special terminator + `fir.do_concurrent.loop` which models the actual loop nest. + + To that end, this PR emits the allocations for the iteration variables + within the block of the `fir.do_concurrent` op and creates a region for + the `fir.do_concurrent.loop` op that accepts arguments equal in number + to the number of the input `do concurrent` iteration ranges. + + For example, given the following input: + ```fortran + do concurrent(i=1:10, j=11:20) + end do + ``` + the changes in this PR emit the following MLIR: + ```mlir + fir.do_concurrent { + %22 = fir.alloca i32 {bindc_name = "i"} + %23:2 = hlfir.declare %22 {uniq_name = "_QFsub1Ei"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %24 = fir.alloca i32 {bindc_name = "j"} + %25:2 = hlfir.declare %24 {uniq_name = "_QFsub1Ej"} : (!fir.ref) -> (!fir.ref, !fir.ref) + fir.do_concurrent.loop (%arg1, %arg2) = (%18, %20) to (%19, %21) step (%c1, %c1_0) { + %26 = fir.convert %arg1 : (index) -> i32 + fir.store %26 to %23#0 : !fir.ref + %27 = fir.convert %arg2 : (index) -> i32 + fir.store %27 to %25#0 : !fir.ref + } + } + ``` + +commit 4c09ae0b2ed6a99e4e69ec9e0507c26cdcc301a9 +Author: Tom Eccles +Date: Tue Apr 8 10:29:18 2025 +0100 + + [flang][OpenMP] Lowering for CANCEL and CANCELLATIONPOINT (#134248) + + These will still hit TODOs in OpenMPToLLVMIRConversion.cpp + +commit 446d4f51eb1a172776e69ffb51b5972a0225c0a1 +Author: Tom Eccles +Date: Tue Apr 8 10:27:27 2025 +0100 + + [flang][OpenMP][Lower] fix statement context cleanup insertion point (#133891) + + The statement context is used for lowering clauses for openmp operations + using generalised helpers from flang lowering. The statement context + stores closures which generate code for cleaning up temporary values + generated by the lowering helper. These closures are run when the + statement construct is destroyed. Keeping the statement context local to + the clause or operation being lowered without any special handling was + not correct because any cleanup code would be generated at the insertion + point when that statement context went out of scope (which would in + general be inside of the newly created container operation). It would be + better to generate the cleanup code after the newly created operation + (clause processing is synchronous even for deferred tasks). + + Currently supported clauses are mostly populated with simple scalar + values that require no cleanup. Even the simple array sections added by + #132994 needed no cleanup because indexing the right values of the array + did not create any temporaries. Supporting array sections with vector + indexing will generate hlfir.destroy operations for cleanup. This patch + fixes where those will be created. Those hlfir.destroy operations don't + generate any FIR (or LLVM) code, but the issue still exists + theoretically. + + I wasn't able to find any clauses which have any cleanup to use to test + this PR. It is probably NFC for the current lowering. This will be + tested in [the PR adding vector subscripting of array + sections](https://github.com/llvm/llvm-project/pull/133892). + +commit 8f0d8d28ccd8a1ced82a744679c5152f90e80c77 +Author: Zhen Wang <37195552+wangzpgi@users.noreply.github.com> +Date: Sun Apr 6 19:31:09 2025 -0700 + + Delete duplicated hlfir.declare op of induction variables of do concurrent when inside cuf kernel directive. (#134467) + + Delete duplicated creation of hlfir.declare op of do concurrent + induction variables when inside cuf kernel directive. + Obtain the correct hlfir.declare op generated from bindSymbol, and add + it to ivValues. + +commit 18dd299fb109792d0716156af0a2d8c0ca781c57 +Author: Sergio Afonso +Date: Thu Apr 3 15:06:19 2025 +0100 + + [Flang][MLIR][OpenMP] Host-evaluation of omp.loop bounds (#133908) + + This patch updates Flang lowering and kernel flags identification in + MLIR so that loop bounds on `target teams loop` constructs are evaluated + on the host, making the trip count available to the corresponding + `__tgt_target_kernel` call emitted for the target region. + + This is necessary in order to properly execute these constructs as + `target teams distribute parallel do`. + + Co-authored-by: Kareem Ergawy + +commit c309abd92553191c404f2dc13f637bcfd53033f9 +Author: Jean-Didier PAILLEUX +Date: Wed Apr 2 14:30:01 2025 +0200 + + [flang] Implement !DIR$ NOVECTOR and !DIR$ NOUNROLL[_AND_JAM] (#133885) + + Hi, + This patch implements support for the following directives : + - `!DIR$ NOUNROLL_AND_JAM` to disable unrolling and jamming on a DO + LOOP. + - `!DIR$ NOUNROLL` to disable unrolling on a DO LOOP. + - `!DIR$ NOVECTOR` to disable vectorization on a DO LOOP. + +commit e17d864f55133d46e12614280951ddb2dc43cc74 +Author: Tom Eccles +Date: Tue Apr 1 10:26:14 2025 +0100 + + [flang][OpenMP][Lower] lower array subscripts for task depend (#132994) + + The OpenMP standard says that all dependencies in the same set of + inter-dependent tasks must be non-overlapping. This simplification means + that the OpenMP only needs to keep track of the base addresses of + dependency variables. This can be seen in kmp_taskdeps.cpp, which stores + task dependency information in a hash table, using the base address as a + key. + + This patch generates a rebox operation to slice boxed arrays, but only + the box data address is used for the task dependency. The extra box is + optimized away by LLVM at O3. + + Vector subscripts are TODO (I will address in my next patch). + + This also fixes a bug for ordinary subscripts when the symbol was mapped + to a box: + + Fixes #132647 + +commit 091dcb8fc2b6ccb88c2975076e94f3cb6530db46 +Author: Thirumalai Shaktivel <74826228+Thirumalai-Shaktivel@users.noreply.github.com> +Date: Tue Apr 1 11:35:44 2025 +0530 + + [Flang] Make a private copy for the common block variables in copyin clause (#111359) + + Fixes: https://github.com/llvm/llvm-project/issues/82949 + +commit fe30cf18ab3eb1aba5ea7e44574e27fdde791c1d +Author: swatheesh-mcw +Date: Fri Mar 28 20:51:52 2025 +0530 + + Revert "Revert "[flang][openmp] Adds Parser and Semantic Support for Interop Construct, and Init and Use Clauses."" (#132343) + + Reverts llvm/llvm-project#132005 + +commit 123eb75cd43a5181c324efc033e978f0f1ed4598 +Author: Michael Kruse +Date: Fri Mar 21 12:32:54 2025 +0100 + + [Flang] Do not emit numeric_storage_size into object file (#131463) + + The value of numeric_storage_size depends on compilation options and + therefore its value is not yet known when building the builtins runtime. + Instead, the parameter is folding a __numeric_storage_size() expression + which is loaded into the user program. For the iso_fortran_env object + file, omit the symbol as it is never used. + + Similar tests that ensure that __numeric_storage_size() is not folded + until compiling the actual user program exist in FortranEvalutate: + + https://github.com/llvm/llvm-project/blob/1e6ba3cd2fe96be00b6ed6ba28b3d9f9271d784d/flang/lib/Evaluate/check-expression.cpp#L487-L492 + + https://github.com/llvm/llvm-project/blob/1e6ba3cd2fe96be00b6ed6ba28b3d9f9271d784d/flang/lib/Evaluate/fold-integer.cpp#L1457-L1460 + + Required for using CMake to compile the builtin module files. See RFC at + https://discourse.llvm.org/t/rfc-building-flangs-builtin-mod-files/84626 + +commit 68180d8d16f07db8200dfce7bae26a80c43ebc5e +Author: Krzysztof Parzyszek +Date: Thu Mar 20 06:50:43 2025 -0500 + + [flang][OpenMP] Use OmpDirectiveSpecification in standalone directives (#131163) + + This uses OmpDirectiveSpecification in the rest of the standalone + directives. + +commit ac9e4e9b3320b8dc63abfbdca8b7561e372ec8c7 +Author: Sergio Afonso +Date: Wed Mar 19 17:29:40 2025 +0000 + + [Flang][OpenMP] Simplify entry block creation for BlockArgOpenMPOpInterface ops, NFC (#132036) + + This patch adds the `OpWithBodyGenInfo::blockArgs` field and updates + `createBodyOfOp()` to prevent the need for `BlockArgOpenMPOpInterface` + operations to pass the same callback, minimizing chances of introducing + inconsistent behavior. + +commit cd26dd55959c11c1cd0ea4fe1f07e0fa9cb7a72a +Author: Krzysztof Parzyszek +Date: Wed Mar 19 11:34:40 2025 -0500 + + [flang][OpenMP] Use OmpDirectiveSpecification in simple directives (#131162) + + The `OmpDirectiveSpecification` contains directive name, the list of + arguments, and the list of clauses. It was introduced to store the + directive specification in METADIRECTIVE, and could be reused everywhere + a directive representation is needed. + In the long term this would unify the handling of common directive + properties, as well as creating actual constructs from METADIRECTIVE by + linking the contained directive specification with any associated user + code. + +commit 96b112fb613e216a198ae2e956a367742c838eed +Author: Kiran Chandramohan +Date: Wed Mar 19 11:13:52 2025 +0000 + + Revert "[flang][openmp] Adds Parser and Semantic Support for Interop Construct, and Init and Use Clauses." (#132005) + + Reverts llvm/llvm-project#120584 + + Reverting due to CI failure + https://lab.llvm.org/buildbot/#/builders/157/builds/22946 + +commit ee8a759bfb4772dea7459f4ecbd83bc2be5ee68b +Author: swatheesh-mcw +Date: Wed Mar 19 16:19:17 2025 +0530 + + [flang][openmp] Adds Parser and Semantic Support for Interop Construct, and Init and Use Clauses. (#120584) + + Adds Parser and Semantic Support for the below construct and clauses: + - Interop Construct + - Init Clause + - Use Clause + + Note: + The other clauses supported by Interop Construct such as Destroy, Use, + Depend and Device are added already. + +commit e7c6e3557b372afe6e78c025addfec276a10b49d +Author: Tom Eccles +Date: Wed Mar 19 10:12:52 2025 +0000 + + [flang][OpenMP] Fix threadprivate pointer variable in common block (#131888) + + Fixes #112538 + + The problem was that the host associated symbol for the threadprivate + variable doesn't have all of the symbol attributes (e.g. POINTER). This + caused the lowering code to generate the wrong type, eventually hitting + an assertion. + +commit cbc5c11feca0a65a7731de8d6eb14fddf2f233aa +Author: Akash Banerjee +Date: Tue Mar 18 13:17:10 2025 +0000 + + [MLIR][OpenMP] Add Lowering support for implicitly linking to default declare mappers (#131006) + +commit 83658ddb1b58fa10cf5f8ac8dfbe794b7a3701bc +Author: Kareem Ergawy +Date: Tue Mar 18 14:07:41 2025 +0100 + + [flang][OpenMP] Enable delayed privatization by default for `omp.distribute` (#131574) + + Switches delayed privatization for `omp.distribute` to be on by default: + controlled by the `-openmp-enable-delayed-privatization` instead of by + `-openmp-enable-delayed-privatization-staging`. + + ### GFortran & Fujitsu test suite results: + + #### gfotran test-suite (this PR): + ``` + Testing Time: 34.51s + Passed: 6569 + ``` + + #### Fujitsu without changes (commit: 0813c5cf5f52): + ``` + Testing Time: 155.39s + Passed : 88325 + Failed : 156 + Executable Missing: 408 + ``` + + #### Fujitsu with changes (this PR): + ``` + Testing Time: 158.54s + Passed : 88325 + Failed : 156 + Executable Missing: 408 + ``` + +commit 3ff3b29dd62436e34d7e6551398c3a09fb590c07 +Author: jeanPerier +Date: Fri Mar 14 10:51:46 2025 +0100 + + [flang] lower remaining cases of pointer assignments inside forall (#130772) + + Implement handling of `NULL()` RHS, polymorphic pointers, as well as + lower bounds or bounds remapping in pointer assignment inside FORALL. + + These cases eventually do not require updating hlfir.region_assign, + lowering can simply prepare the new descriptor for the LHS inside the + RHS region. + + Looking more closely at the polymorphic cases, there is not need to call + the runtime, fir.rebox and fir.embox do handle the dynamic type setting + correctly. + + After this patch, the last remaining TODO is the allocatable assignment + inside FORALL, which like some cases here, is more likely an accidental + feature given FORALL was deprecated in F2003 at the same time than + allocatable components where added. + +commit f4fc2d731c1b351d5f684f7ec53a0e1ca549df43 +Author: Krzysztof Parzyszek +Date: Wed Mar 12 19:41:11 2025 -0500 + + [flang][OpenMP] Map ByRef if size/alignment exceed that of a pointer (#130832) + + Improve the check for whether a type can be passed by copy. Currently, + passing by copy is done via the OMP_MAP_LITERAL mapping, which can only + transfer as much data as can be contained in a pointer representation. + +commit 29f5d5bea92f937d4e2fea7fdd16036fff528adf +Author: Leandro Lupori +Date: Tue Mar 11 09:38:40 2025 -0300 + + [flang][OpenMP] Fix privatization of procedure pointers (#130336) + + Fixes #121720 + +commit d67947162f4b06172fac91fefa6a9ad25eb6dd56 +Author: Krzysztof Parzyszek +Date: Mon Mar 10 08:11:01 2025 -0500 + + [flang][OpenMP] Implement HAS_DEVICE_ADDR clause (#128568) + + The HAS_DEVICE_ADDR indicates that the object(s) listed exists at an + address that is a valid device address. Specifically, + `has_device_addr(x)` means that (in C/C++ terms) `&x` is a device + address. + + When entering a target region, `x` does not need to be allocated on the + device, or have its contents copied over (in the absence of additional + mapping clauses). Passing its address verbatim to the region for use is + sufficient, and is the intended goal of the clause. + + Some Fortran objects use descriptors in their in-memory representation. + If `x` had a descriptor, both the descriptor and the contents of `x` + would be located in the device memory. However, the descriptors are + managed by the compiler, and can be regenerated at various points as + needed. The address of the effective descriptor may change, hence it's + not safe to pass the address of the descriptor to the target region. + Instead, the descriptor itself is always copied, but for objects like + `x`, no further mapping takes place (as this keeps the storage pointer + in the descriptor unchanged). + + --------- + + Co-authored-by: Sergio Afonso + +commit 40e245a9aac02e0bbb6b44287bc13c80a68d37b3 +Author: jeanPerier +Date: Fri Mar 7 10:28:02 2025 +0100 + + [flang] add support for procedure pointer assignment inside FORALL (#130114) + + Very similar to object pointer assignment, the difference is the SSA + types of the LHS (!fir.ref()>> and RHS + (!fir.boxproc<()->()). + + The RHS must be saved as simple address, not descriptors (it is not + possible to make CFI descriptor out of procedure entity). + +commit 9543e9e9270e01f2c7311b571246c6ea105bcdb0 +Author: Kareem Ergawy +Date: Fri Mar 7 05:44:39 2025 +0100 + + [flang][OpenMP] Handle pre-detemined `lastprivate` for `simd` (#129507) + + This PR tries to fix `lastprivate` update issues in composite + constructs. In particular, pre-determined `lastprivate` symbols are + attached to the wrong leaf of the composite construct (the outermost + one). When using delayed privatization (should be the default mode in + the future), this results in trying to update the `lastprivate` symbol + in the wrong construct (outside the `omp.loop_nest` op). + + For example, given the following input: + ```fortran + !$omp target teams distribute parallel do simd collapse(2) private(y_max) + do i=x_min,x_max + do j=y_min,y_max + enddo + enddo + ``` + + Without the fixes introduced in this PR, the `DataSharingProcessor` + tries to generate the `lastprivate` update ops in the `parallel` op + since this is the op for which the DSP instance is created. + + The fix consists of 2 main parts: + 1. Instead of creating a single DSP instance, one instance is created + for the leaf constructs that might need privatization (whether for + explicit, implicit, or pre-determined symbols). + 2. When generating the `lastprivate` comparison ops, we don't directly + use the SSA values of the UBs and steps. Instead, we regenerated these + SSA values from the original loop bounds' expressions. We have to do + this to avoid using `host_eval` values in the `lastprivate` comparison + logic which is illegal. + +commit 478e5161406a781afc41e15bf942fb5df6672067 +Author: Valentin Clement (バレンタイン クレメン) +Date: Thu Mar 6 19:19:51 2025 -0800 + + [flang][cuda] Sync double descriptor after c_f_pointer call (#130194) + + After a global device pointer is set through `c_f_pointer`, we need to + sync the double descriptor so the version on the device is also up to + date. + +commit e2911aa2c254c071c2ab898a69c69ba8c94586f8 +Author: Kiran Chandramohan +Date: Thu Mar 6 12:19:34 2025 +0000 + + [Flang][OpenMP] Fix crash when loop index var is pointer or allocatable (#129717) + + Use hlfir dereferencing for pointers and allocatables and use hlfir + assign. Also, change the code updating IV in lastprivate. + + Note: This is a small change. Modifications in existing tests are + changes from fir.store to hlfir.assign. + + Fixes #121290 + +commit d1abbb4dc5071e379d048f98b096260ed899ae44 +Author: Zhen Wang <37195552+wangzpgi@users.noreply.github.com> +Date: Wed Mar 5 14:50:42 2025 -0800 + + [flang][cuda] Change induction variable from i32 to index for doconcurrent inside cuf kernel directive (#129924) + + Use `index` instead of `i32` for induction variables for doconcurrent + inside cuf kernel directive. Regular do loop inside cuf kernel directive + also uses `index`: + ``` + cuf.kernel<<<*, *>>> (%arg0 : index) = ... + ``` + +commit 7302e1b94edb2de459a72b3e452d4f3be2d795eb +Author: jeanPerier +Date: Wed Mar 5 11:24:04 2025 +0100 + + [flang] implement simple pointer assignments inside FORALL (#129522) + + The semantic of pointer assignments inside FORALL requires evaluating + the targets (RHS) and pointer variables (LHS) of all iterations before + evaluating the assignments. + + In practice, if the compiler can prove that the RHS and LHS evaluations + are not impacted by the assignments, the evaluation of the FORALL + assignment statement can be done in a single loop. However, if the + compiler cannot prove this, it needs to "save" the addresses of the + targets and/or the pointer descriptors of each iterations before doing + the assignments. + + This patch implements the most common cases where there is no lower bound + spec, no bounds remapping, the LHS is not polymorphic, and the RHS is + not NULL. + + The HLFIR operation used to represent assignments inside FORALL can be + used for pointer assignments to (the only difference being that the LHS + is a descriptor address). + + The analysis for intrinsic assignment can be reused, with the + distinction that the RHS data is not read during the assignment. + + The logic that is used to save LHS in intrinsic assignments inside + FORALL is extracted to be used for the RHS of pointer assignments when + needed (saving a descriptor value). + Pointer assignment LHS are just descriptor addresses and are saved as + int_ptr values. + +commit 9573c621147748e5ca07f545db0d995708c29435 +Author: Krzysztof Parzyszek +Date: Mon Mar 3 07:59:19 2025 -0600 + + [flang][OpenMP] Accept modern syntax of FLUSH construct (#128975) + + The syntax with the object list following the memory-order clause has + been removed in OpenMP 5.2. Still, accept that syntax with versions >= + 5.2, but treat it as deprecated (and emit a warning). + +commit d1fd3698a9b755250f622fd1b14c57a27e2a9d77 +Author: Valentin Clement (バレンタイン クレメン) +Date: Sun Mar 2 16:12:01 2025 -0800 + + [flang][cuda] Allow unsupported data transfer to be done on the host (#129160) + + Some data transfer marked as unsupported can actually be deferred to an + assignment on the host when the variables involved are unified or + managed. + +commit 24b7759a9dfe5714236957e7d829e2412100a4b7 +Author: Mats Petersson +Date: Tue Feb 25 17:36:25 2025 +0000 + + [FLANG][OpenMP]Add frontend support for ASSUME and ASSUMES (#120770) + + Enough suport to parse correctly formed directives of !$OMP ASSUME and + !$OMP ASSUMES with teh related clauses that go with them: ABSENT, + CONTAINS, NO_OPENPP, NO_OPENMP_ROUTINES, NO_PARALLELISM and HOLDS. + + Tests added for unparsing and dump parse-tree. + + Semantics support is very minimal and no specific tests added. + + The lowering will hit a TODO, and there are tests in Lower/OpenMP/Todo + to make it clear that this is currently expected behaviour. + + --------- + + Co-authored-by: Kiran Chandramohan + Co-authored-by: Krzysztof Parzyszek + +commit 25c19eb1178a26b09e8ee58c825d4ed0260b70da +Author: Sergio Afonso +Date: Tue Feb 25 10:35:21 2025 +0000 + + [Flang][OpenMP] Allow host evaluation of loop bounds for distribute (#127822) + + This patch adds `target teams distribute [simd]` and equivalent + construct nests to the list of cases where loop bounds can be evaluated + in the host, as they represent kernels for which the trip count must + also be evaluated in advance to the kernel call. + +commit a67566b185c56ce84f6b858e431e4d412b40fdaa +Author: Zhen Wang <37195552+wangzpgi@users.noreply.github.com> +Date: Thu Feb 20 14:05:44 2025 -0800 + + Allow do concurrent inside cuf kernel directive (#127693) + + Allow do concurrent inside cuf kernel directive to avoid the following + Lowering error: + ``` + void {anonymous}::FirConverter::genFIR(const Fortran::parser::CUFKernelDoConstruct&): Assertion `bounds && "Expected bounds on the loop construct"' failed. + ``` + + --------- + + Co-authored-by: Valentin Clement (バレンタイン クレメン) + +commit d6c6bde9dbcf332b5092ebcee8c7fe6fbb5aa2ae +Author: Jean-Didier PAILLEUX +Date: Wed Feb 19 16:00:09 2025 +0100 + + [flang] Implement !DIR$ UNROLL_AND_JAM [N] (#125046) + + This patch implements support for the UNROLL_AND_JAM directive to enable + or disable unrolling and jamming on a `DO LOOP`. + It must be placed immediately before a `DO LOOP` and applies only to the + loop that follows. N is an integer that specifying the unrolling factor. + This is done by adding an attribute to the branch into the loop in LLVM + to indicate that the loop should unrolled and jammed. + +commit 9905728e2fb4ebe9b7518dfd73a0574eea0a2083 +Author: Akash Banerjee +Date: Tue Feb 18 16:36:01 2025 +0000 + + [MLIR][OpenMP] Add Lowering support for OpenMP Declare Mapper directive (#117046) + + This patch adds HLFIR/FIR lowering support for OpenMP Declare Mapper + directive. + Depends on #117045. + +commit 6b52fb25b90e575b507343bde0162d3d652ff666 +Author: Asher Mancinelli +Date: Mon Feb 10 08:21:22 2025 -0800 + + [flang] Correctly handle `!dir$ unroll` with unrolling factors of 0 and 1 (#126170) + + https://github.com/llvm/llvm-project/pull/123331 added support for the + unrolling directive. In the presence of an explicit unrolling factor, + that unrolling factor would be unconditionally passed into the metadata + even when it was 1 or 0. These special cases should instead disable + unrolling. Adding an explicit unrolling factor of 0 triggered this + assertion which is fixed by this patch: + + ``` + unsigned int unrollCountPragmaValue(const llvm::Loop*): + Assertion `Count >= 1 && "Unroll count must be positive."' failed. + ``` + + Updated tests and documentation. + +commit dcb124e820b2bf9dda60f66151591155a385580e +Author: Kareem Ergawy +Date: Thu Feb 6 19:11:04 2025 +0100 + + [flang][OpenMP] Enable delayed privatization by default `omp.wsloop` (#125732) + + Reapplies #122471 + + This is based on https://github.com/llvm/llvm-project/pull/125699, only + the latest commit is relevant. + + With changes in this PR and the parent one, the previously reported + failures in the Fujitsu(*) test suite should hopefully be resolved (I + verified all the 14 reported failures and they pass now). + + (*) https://linaro.atlassian.net/browse/LLVM-1521 + +commit b815a3942a0b0a9e7aab6b269ffdb0e93abc4368 +Author: Michael Kruse +Date: Thu Feb 6 15:29:10 2025 +0100 + + [Flang] Move non-common headers to FortranSupport (#124416) + + Move non-common files from FortranCommon to FortranSupport (analogous to + LLVMSupport) such that + + * declarations and definitions that are only used by the Flang compiler, + but not by the runtime, are moved to FortranSupport + + * declarations and definitions that are used by both ("common"), the + compiler and the runtime, remain in FortranCommon + + * generic STL-like/ADT/utility classes and algorithms remain in + FortranCommon + + This allows a for cleaner separation between compiler and runtime + components, which are compiled differently. For instance, runtime + sources must not use STL's `` which causes problems with CUDA + support. Instead, the surrogate header `flang/Common/optional.h` must be + used. This PR fixes this for `fast-int-sel.h`. + + Declarations in include/Runtime are also used by both, but are + header-only. `ISO_Fortran_binding_wrapper.h`, a header used by compiler + and runtime, is also moved into FortranCommon. + +commit ccd92ec4c6ceb09e75ed40c96c1da7d03b9c45d5 +Author: Anchu Rajendran S +Date: Wed Feb 5 06:55:32 2025 -0800 + + [flang][openmp] Changes for invoking scan Op (#123254) + +commit 6fc66d322b00bdabc27fe8e14b27ab9bd53ba770 +Author: Leandro Lupori +Date: Tue Feb 4 10:28:14 2025 -0300 + + [flang][OpenMP] Fix sections lastprivate for common blocks (#125504) + + Common block handling was missing in sections' lastprivate lowering. + + Fixes #121719 + +commit 6dfe20dbbd65e2945350ed9a93eb383131c49511 +Author: Krzysztof Parzyszek +Date: Mon Feb 3 11:13:44 2025 -0600 + + [flang][OpenMP] Parse METADIRECTIVE in specification part (#123397) + + Add METADIRECTIVE to the OpenMP declarative constructs as well. Emit a + TODO error for both declarative and executable cases. + +commit 15ab7be2e049bc0f4ea6744ca037395686a923bc +Author: Krzysztof Parzyszek +Date: Wed Jan 29 15:07:20 2025 -0600 + + [flang][OpenMP] Parse WHEN, OTHERWISE, MATCH clauses plus METADIRECTIVE (#121817) + + Parse METADIRECTIVE as a standalone executable directive at the moment. + This will allow testing the parser code. + + There is no lowering, not even clause conversion yet. There is also no + verification of the allowed values for trait sets, trait properties. + +commit e811cb00e533e9737db689e35ee6cb0d5af536cc +Author: Jean-Didier PAILLEUX +Date: Wed Jan 29 09:44:09 2025 +0100 + + [flang] Implement !DIR$ UNROLL [N] (#123331) + + This patch implements support for the UNROLL directive to control how + many times a loop should be unrolled. + It must be placed immediately before a `DO LOOP` and applies only to the + loop that follows. N is an integer that specifying the unrolling factor. + This is done by adding an attribute to the branch into the loop in LLVM + to indicate that the loop should unrolled. + The code pushed to support the directive `VECTOR ALWAYS` has been + modified to take account of the fact that several directives can be used + before a `DO LOOP`. + +commit 654b76321a602db4d68734e9fd11efbb7d8eb617 +Author: Valentin Clement (バレンタイン クレメン) +Date: Tue Jan 28 20:57:33 2025 -0800 + + [flang][cuda] Allow to set the stack limit size (#124859) + + This patch adds a call to the CUFInit function just after `ProgramStart` + when CUDA Fortran is enabled to initialize the CUDA context. This allows + us to set up some context information like the stack limit that can be + defined by an environment variable `ACC_OFFLOAD_STACKSIZE=`. + +commit 8035d38daab028b8da3cf2b01090b5f0ceacd695 +Author: Mats Petersson +Date: Sun Jan 26 09:44:04 2025 +0000 + + [Flang][OpenMP]Add parsing support for DISPATCH construct (#121982) + + This allows the Flang parser to accept the !$OMP DISPATCH and related + clauses. + + Lowering is currently not implemented. Tests for unparse and parse-tree + dump is provided, and one for checking that the lowering ends in a "not + yet implemented" + + --------- + + Co-authored-by: Kiran Chandramohan + +commit daa18205c6f0a3b5dd62ba2e65948e1a9182a60f +Author: Kaviya Rajendiran <67495422+kaviya2510@users.noreply.github.com> +Date: Thu Jan 23 11:14:00 2025 +0530 + + [Flang][OpenMP] Fix copyin allocatable lowering to MLIR (#122097) + + Fixes https://github.com/llvm/llvm-project/issues/113191 + + Issue: [flang][OpenMP] Runtime segfault when an allocatable variable is + used with copyin + + Rootcause: The value of the threadprivate variable is not being copied + from the primary thread to the other threads within a parallel region. + As a result it tries to access a null pointer inside a parallel region + which causes segfault. + + Fix: When allocatables used with copyin clause need to ensure that, on + entry to any parallel region each thread’s copy of a variable will + acquire the allocation status of the primary thread, before copying the + value of a threadprivate variable of the primary thread to the + threadprivate variable of each other member of the team. + +commit 937cbce14c9aa956342a9c818c26a8a557802843 +Author: Kareem Ergawy +Date: Wed Jan 22 10:16:40 2025 +0100 + + Revert "[flang][OpenMP] Enable delayed privatization by default `omp.wsloop` (#122471)" (#123324) + + This seems to have caused some regressions in Fujitsu's test-suite: + https://linaro.atlassian.net/browse/LLVM-1521 + + This reverts commit 6f82408bb53f57a859953d8f1114f1634a5d3ee9. + +commit 662133a278f4f3553f061f7999759bae4e842820 +Author: jeanPerier +Date: Tue Jan 21 20:32:42 2025 +0100 + + [flang][OpenMP][OpenACC] remove libEvaluate dependency in passes (#123784) + + Move OpenACC/OpenMP helpers from Lower/DirectivesCommon.h that are also + used in OpenACC and OpenMP mlir passes into a new + Optimizer/Builder/DirectivesCommon.h so that parser and evaluate headers + are not included in Optimizer libraries (this both introduce + compile-time and link-time pointless overheads). + + This should fix https://github.com/llvm/llvm-project/issues/123377 + +commit c2aa11d148679b7d49cdff3819d5c8bdbd807777 +Author: Thirumalai Shaktivel <74826228+Thirumalai-Shaktivel@users.noreply.github.com> +Date: Tue Jan 21 09:10:25 2025 +0530 + + [Flang] Add LLVM lowering support for UNTIED clause in Task (#121052) + + Implementation details: + The UNTIED clause is recognized by setting the flag=0 for the default + case or performing logical OR to flag if other clauses are specified, + and this flag is passed as an argument to the `__kmpc_omp_task_alloc` + runtime call. + + + Resubmitting the PR with fix for the failure, as it was reverted here: + 927a70daf31b1610627f346b0dc140eda72144b9 + and previously merged here: https://github.com/llvm/llvm-project/pull/115283 + +commit a0406ce823e8f1c1993b565d08b045c0104c3a5a +Author: Kareem Ergawy +Date: Thu Jan 16 19:10:12 2025 +0100 + + [flang][OpenMP] Add `hostIsSource` paramemter to `copyHostAssociateVar` (#123162) + + This fixes a bug when the same variable is used in `firstprivate` and + `lastprivate` clauses on the same construct. The issue boils down to the + fact that `copyHostAssociateVar` was deciding the direction of the copy + assignment (i.e. the `lhs` and `rhs`) based on whether the + `copyAssignIP` + parameter is set. This is not the best way to do it since it is not + related to whether we doing a copy from host to localized copy or the + other way around. When we set the insertion for `firstprivate` in + delayed privatization, this resulted in switching the direction of the + copy assignment. Instead, this PR adds a new paramter to explicitely + tell + the function the direction of the assignment. + + This is a follow up PR for + https://github.com/llvm/llvm-project/pull/122471, only the latest commit + is relevant. + +commit 6f82408bb53f57a859953d8f1114f1634a5d3ee9 +Author: Kareem Ergawy +Date: Thu Jan 16 15:44:59 2025 +0100 + + [flang][OpenMP] Enable delayed privatization by default `omp.wsloop` (#122471) + + This enable delayed privatization by default for `omp.wsloop` ops, with + one caveat! I had to workaround the "impure" alloc region issue that + being resolved at the moment. The workaround detects whether the alloc + region's argument is used in the region and at the same time defined in + block that does not dominate the chosen alloca insertion point. If so, + we move the alloca insertion point below the defining instruction of the + alloc region argument. This basically reverts to the + non-delayed-privatizaiton behavior. + +commit 0d150817c354bc61a48676754288aabbb03570c3 +Author: Kazu Hirata +Date: Tue Jan 14 11:08:53 2025 -0800 + + [flang] Fix a warning + + This patch fixes: + + flang/lib/Lower/OpenMP/OpenMP.cpp:599:15: error: unused variable + 'ompEval' [-Werror,-Wunused-variable] + +commit 8fe11a26ae8f12622ddec83a7b80637080843a8b +Author: Sergio Afonso +Date: Tue Jan 14 13:55:17 2025 +0000 + + [Flang][OpenMP] Lowering of host-evaluated clauses (#116219) + + This patch adds support for lowering OpenMP clauses and expressions + attached to constructs nested inside of a target region that need to be + evaluated in the host device. This is done through the use of the + `OpenMP_HostEvalClause` `omp.target` set of operands and entry block + arguments. + + When lowering clauses for a target construct, a more involved + `processHostEvalClauses()` function is called, which looks at the + current and potentially other nested constructs in order to find and + lower clauses that need to be processed outside of the `omp.target` + operation under construction. This populates an instance of a global + structure with the resulting MLIR values. + + The resulting list of host-evaluated values is used to initialize the + `host_eval` operands when constructing the `omp.target` operation, and + then replaced with the corresponding block arguments after creating that + operation's region. + + Afterwards, while lowering nested operations, those that might + potentially be evaluated on the host (i.e. `num_teams`, `thread_limit`, + `num_threads` and `collapse`) check first whether there is an active + global host-evaluated information structure and whether it holds values + referring to these clauses. If that is the case, the stored values + (`omp.target` entry block arguments at that stage) are used instead of + lowering these clauses again. + +commit 82b9eb1086d45caf74ff3d5dfa519631c247eb14 +Author: Sergio Afonso +Date: Mon Jan 13 12:31:29 2025 +0000 + + [Flang][OpenMP] Support teams reductions lowering (#122683) + + This patch adds PFT to MLIR lowering of teams reductions. Since there is + still no MLIR to LLVM IR translation implemented, compilation of + programs including these constructs will still trigger + not-yet-implemented errors. + +commit 42da12063f49e8d52e63dcb36d25b55ed3688a26 +Author: Kareem Ergawy +Date: Sun Jan 12 07:46:58 2025 +0100 + + [flang][OpenMP] Extend delayed privatization for `omp.simd` (#122156) + + Adds support for delayed privatization for `simd` directives. This PR + includes PFT down to LLVM IR lowering. + +commit d82d53b2e3d7fb2f44f91dc1ca9ce8bb5487da57 +Author: jeanPerier +Date: Tue Jan 7 10:04:27 2025 +0100 + + [flang][openmp] initialize allocatable components of firstprivate copies (#121808) + + Descriptors of allocatable components of firstprivate derived type + copies need to be set-up. Otherwise the program later die when + manipulating them inside OpenMP region. + +commit 9165848c8285884938583f5c3a35c97ec03ee486 +Author: Valentin Clement (バレンタイン クレメン) +Date: Fri Jan 3 14:37:14 2025 -0800 + + [flang][cuda] Sync global descriptor when nullifying pointer (#121595) + +commit 5137c209f0c19668d06e48cc4293e4c01a77c964 +Author: agozillon +Date: Fri Jan 3 16:46:15 2025 +0100 + + [Flang][OpenMP] Fix allocating arrays with size intrinisic (#119226) + + Attempt to address the following example from causing an assert or ICE: + + ``` + subroutine test(a) + implicit none + integer :: i + real(kind=real64), dimension(:) :: a + real(kind=real64), dimension(size(a, 1)) :: b + + !$omp target map(tofrom: b) + do i = 1, 10 + b(i) = i + end do + !$omp end target + end subroutine + ``` + + Where we utilise a Fortran intrinsic (size) to calculate the size of + allocatable arrays and then map it to device. + +commit adeff9f63a24f60b0bf240bf13e40bbf7c1dd0e8 +Author: Krzysztof Parzyszek +Date: Fri Jan 3 09:21:36 2025 -0600 + + [flang][OpenMP] Allow utility constructs in specification part (#121509) + + Allow utility constructs (error and nothing) to appear in the + specification part as well as the execution part. The exception is + "ERROR AT(EXECUTION)" which should only be in the execution part. + In case of ambiguity (the boundary between the specification and the + execution part), utility constructs will be parsed as belonging to the + specification part. In such cases move them to the execution part in the + OpenMP canonicalization code. + +commit df859f90aab261918eee26382021e8455b532f7d +Author: Krzysztof Parzyszek +Date: Fri Jan 3 08:36:34 2025 -0600 + + [flang][OpenMP] Frontend support for NOTHING directive (#120606) + + Create OpenMPUtilityConstruct and put the two utility directives in it + (error and nothing). Rename OpenMPErrorConstruct to OmpErrorDirective. + +commit c870632ef6162fbdccaad8cd09420728220ad344 +Author: Matthias Springer +Date: Wed Dec 25 09:42:03 2024 +0100 + + [flang] Fix some memory leaks (#121050) + + This commit fixes some but not all memory leaks in Flang. There are + still 91 tests that fail with ASAN. + + - Use `mlir::OwningOpRef` instead of `std::unique_ptr`. The latter does + not free allocations of nested blocks. + - Pass `ModuleOp` as value instead of reference. + - Add few missing deallocations in test cases and other places. + +commit 927a70daf31b1610627f346b0dc140eda72144b9 +Author: Muhammad Omair Javaid +Date: Tue Dec 24 01:47:24 2024 +0500 + + Revert "[Flang OpenMP] Add LLVM translation support for UNTIED in Task (#115283)" + + This reverts commit 919aead1db64b2f1444842bc75a3af7836238671. + It breaks following LLVM bots: + https://lab.llvm.org/buildbot/#/builders/199 + https://lab.llvm.org/buildbot/#/builders/143 + https://lab.llvm.org/buildbot/#/builders/17 + +commit 919aead1db64b2f1444842bc75a3af7836238671 +Author: Thirumalai Shaktivel <74826228+Thirumalai-Shaktivel@users.noreply.github.com> +Date: Fri Dec 20 16:36:51 2024 +0530 + + [Flang OpenMP] Add LLVM translation support for UNTIED in Task (#115283) + + Implementation details: + The UNTIED clause is recognized by setting the flag=0 for the default + case or performing logical OR to flag if other clauses are specified, + and this flag is passed as an argument to the `__kmpc_omp_task_alloc` + runtime call. + +commit 1fcb6a9754a8db057e18f629cb90011b638901e7 +Author: Leandro Lupori +Date: Thu Dec 19 17:26:50 2024 -0300 + + [flang][OpenMP] Initialize allocatable members of derived types (#120295) + + Allocatable members of privatized derived types must be allocated, + with the same bounds as the original object, whenever that member + is also allocated in it, but Flang was not performing such + initialization. + + The `Initialize` runtime function can't perform this task unless + its signature is changed to receive an additional parameter, the + original object, that is needed to find out which allocatable + members, with their bounds, must also be allocated in the clone. + As `Initialize` is used not only for privatization, sometimes this + other object won't even exist, so this new parameter would need + to be optional. + Because of this, it seemed better to add a new runtime function: + `InitializeClone`. + To avoid unnecessary calls, lowering inserts a call to it only for + privatized items that are derived types with allocatable members. + + Fixes https://github.com/llvm/llvm-project/issues/114888 + Fixes https://github.com/llvm/llvm-project/issues/114889 + +commit fc97d2e68b03bc2979395e84b645e5b3ba35aecd +Author: Peter Klausler +Date: Wed Dec 18 07:02:37 2024 -0800 + + [flang] Add UNSIGNED (#113504) + + Implement the UNSIGNED extension type and operations under control of a + language feature flag (-funsigned). + + This is nearly identical to the UNSIGNED feature that has been available + in Sun Fortran for years, and now implemented in GNU Fortran for + gfortran 15, and proposed for ISO standardization in J3/24-116.txt. + + See the new documentation for details; but in short, this is C's + unsigned type, with guaranteed modular arithmetic for +, -, and *, and + the related transformational intrinsic functions SUM & al. + +commit e532241b021cd48bad303721757c1194bc844775 +Author: Kareem Ergawy +Date: Wed Dec 18 09:19:45 2024 +0100 + + Re-apply (#117867): [flang][OpenMP] Implicitly map allocatable record fields (#120374) + + This re-applies #117867 with a small fix that hopefully prevents build + bot failures. The fix is avoiding `dyn_cast` for the result of + `getOperation()`. Instead we can assign the result to `mlir::ModuleOp` + directly since the type of the operation is known statically (`OpT` in + `OperationPass`). + +commit dc936f3c199374056d3aaf3a0434b9efd807fc6c +Author: Kareem Ergawy +Date: Wed Dec 18 06:52:24 2024 +0100 + + Revert "[flang][OpenMP] Implicitly map allocatable record fields (#117867)" (#120360) + +commit db09014a0747931026e31f40c4f541d110a5298c +Author: Kareem Ergawy +Date: Wed Dec 18 05:37:58 2024 +0100 + + [flang][OpenMP] Implicitly map allocatable record fields (#117867) + + This is a starting PR to implicitly map allocatable record fields. + + This PR contains the following changes: + 1. Re-purposes some of the utils used in `Lower/OpenMP.cpp` so that + these utils work on the `mlir::Value` level rather than the + `semantics::Symbol` level. This takes one step towards to enabling + MLIR passes to more easily do some lowering themselves (e.g. creating + `omp.map.bounds` ops for implicitely caputured data like this PR + does). + 2. Adds support for implicitely capturing and mapping allocatable fields + in record types. + + There is quite some distant to still cover to have full support for + this. I added a number of todos to guide further development. + + Co-authored-by: Andrew Gozillon + + Co-authored-by: Andrew Gozillon + +commit 9d33874936d83b8ddf5d028d313d810214f00f20 +Author: Slava Zakharin +Date: Tue Dec 17 09:06:05 2024 -0800 + + [flang] Support -f[no-]realloc-lhs. (#120165) + + -frealloc-lhs is the default. + If -fno-realloc-lhs is specified, then an allocatable on the left + side of an intrinsic assignment is not implicitly (re)allocated + to conform with the right hand side. Fortran runtime will issue + an error if there is a mismatch in shape/type/allocation-status. + +commit 75e6d0eb4d6ad1b58e5eb5c4d25371e6062cee44 +Author: Mats Petersson +Date: Fri Dec 13 14:05:48 2024 +0000 + + [flang][OpenMP]Add support for OpenMP ERROR directive (#119582) + + Lowering leads to a TODO, with a test to confirm. + + Also testing unparse. + + --------- + + Co-authored-by: Krzysztof Parzyszek + +commit 7c9404c279cfa13e24a043e6357cc85bd12f55f1 +Author: Ivan R. Ivanov +Date: Fri Dec 13 21:44:43 2024 +0900 + + [flang][OpenMP] Add frontend support for ompx_bare clause (#111106) + +commit db9856b516a36c259fb17af422cd80d6ebc67406 +Author: Leandro Lupori +Date: Wed Dec 11 16:26:19 2024 -0300 + + [flang][OpenMP][NFC] Turn symTable into a reference (#119435) + + Convert `DataSharingProcessor::symTable` from pointer to reference. + This avoids accidental null pointer dereferences and makes it + possible to use `symTable` when delayed privatization is disabled. + +commit 0469bb91aa82b331052d314de53546548e6eb060 +Author: Valentin Clement (バレンタイン クレメン) +Date: Tue Dec 10 09:48:15 2024 -0800 + + [flang][cuda] Fix lowering when step is a variable (#119421) + + Add missing conversion. + +commit edc50f3954af081b385cb03961899b5da1f1eb6b +Author: NimishMishra <42909663+NimishMishra@users.noreply.github.com> +Date: Tue Dec 10 16:55:06 2024 +0530 + + [flang][OpenMP] Add lowering support for task detach (#119128) + + This PR adds lowering task detach to MLIR. + +commit a88677edc0792534ba3157bf7d7a1b98e470f2fb +Author: Yusuke MINATO +Date: Tue Dec 10 16:26:53 2024 +0900 + + Reland "[flang] Integrate the option -flang-experimental-integer-overflow into -fno-wrapv" (#118933) + + This relands #110063. + The performance issue on 503.bwaves_r is found not to be related to the + patch, and is resolved by fbd89bcc when LTO is enabled. + +commit c91ba04328e1ded6f284469a7828d181324d4e30 +Author: Michael Kruse +Date: Fri Dec 6 15:29:00 2024 +0100 + + [Flang][NFC] Split runtime headers in preparation for cross-compilation. (#112188) + + Split some headers into headers for public and private declarations in + preparation for #110217. Moving the runtime-private headers in + runtime-private include directory will occur in #110298. + + * Do not use `sizeof(Descriptor)` in the compiler. The size of the + descriptor is target-dependent while `sizeof(Descriptor)` is the size of + the Descriptor for the host platform which might be too small when + cross-compiling to a different platform. Another problem is that the + emitted assembly ((cross-)compiling to the same target) is not identical + between Flang's running on different systems. Moving the declaration of + `class Descriptor` out of the included header will also reduce the + amount of #included sources. + + * Do not use `sizeof(ArrayConstructorVector)` and + `alignof(ArrayConstructorVector)` in the compiler. Same reason as with + `Descriptor`. + + * Compute the descriptor's extra flags without instantiating a + Descriptor. `Fortran::runtime::Descriptor` is defined in the runtime + source, but not the compiler source. + + * Move `InquiryKeywordHashDecode` into runtime-private header. The + function is defined in the runtime sources and trying to call it in the + compiler would lead to a link-error. + + * Move allocator-kind magic numbers into common header. They are the + only declarations out of `allocator-registry.h` in the compiler as well. + + This does not make Flang cross-compile ready yet, the main goal is to + avoid transitive header dependencies from Flang to clang-rt. There are + more assumptions that host platform is the same as the target platform. + +commit ff78cd5f3d6ae8e7084f0aff4df4164ff5a38af9 +Author: jeanPerier +Date: Thu Dec 5 14:09:48 2024 +0100 + + [flang] fix private pointers and default initialized variables (#118494) + + Both OpenMP privatization and DO CONCURRENT LOCAL lowering was incorrect + for pointers and derived type with default initialization. + + For pointers, the descriptor was not established with the rank/type + code/element size, leading to undefined behavior if any inquiry was made + to it prior to a pointer assignment (and if/when using the runtime for + pointer assignments, the descriptor must have been established). + + For derived type with default initialization, the copies were not + default initialized. + +commit 6003be7ef14bd95647e1ea6ec9685c1310f8ce58 +Author: vdonaldson <37090318+vdonaldson@users.noreply.github.com> +Date: Wed Dec 4 16:21:11 2024 -0500 + + [flang] IEEE_GET_UNDERFLOW_MODE, IEEE_SET_UNDERFLOW_MODE (#118551) + + Implement IEEE_GET_UNDERFLOW_MODE and IEEE_SET_UNDERFLOW_MODE. Update + IEEE_SUPPORT_UNDERFLOW_CONTROL to enable support for indvidual REAL + kinds. + +commit 03b5f8f0f0d10c412842ed04b90e2217cf071218 +Author: Mats Petersson +Date: Mon Dec 2 15:05:21 2024 +0000 + + [flang][OpenMP]Add parsing and semantics support for ATOMIC COMPARE (#117032) + + This adds a minimalistic implementation of parsing and semantics for the + ATOMIC COMPARE feature from OpenMP 5.1. + + There is no lowering, just a TODO for that part. Some of the Semantics + is also just a comment explaining that more is needed. + +commit 94488445cdd1657d1363a4994393b193c291b2cc +Author: Kareem Ergawy +Date: Mon Dec 2 15:01:09 2024 +0100 + + [flang][MLIR] Support delayed privatization for `wsloop` (PFT -> MLIR) (#118271) + + Adds PFT to MLIR lowering for delayed privatization of `omp.wsloop` ops. + Lowering to LLVM IR will be added in a later PR. + +commit 81f544d4659a96772c7e2ffed1bbe557993f4b34 +Author: Kareem Ergawy +Date: Thu Nov 28 05:15:06 2024 +0100 + + [flang][OpenMP] Rewrite `omp.loop` to semantically equivalent ops (#115443) + + Introduces a new conversion pass that rewrites `omp.loop` ops to their + semantically equivalent op nests bases on the surrounding/binding + context of the `loop` op. Not all forms of `omp.loop` are supported yet. + See `isLoopConversionSupported` for more info on which forms are + supported. + +commit e573c6b67eb729a625431121139100bebc61ba1f +Author: Yusuke MINATO +Date: Thu Nov 28 08:58:09 2024 +0900 + + [flang] Add nsw to DO loop parameters (#113854) + + nsw is added to DO loop parameters (initial parameters, terminal + parameters, and incrementation parameters). + This can help vectorization in some cases like #110609. + + See also the discussion in + https://discourse.llvm.org/t/rfc-add-nsw-flags-to-arithmetic-integer-operations-using-the-option-fno-wrapv/77584/20. + +commit 89b31c9c32f2cd2c038fe2b12d9c66a53c779fc1 +Author: Kiran Chandramohan +Date: Wed Nov 27 14:20:34 2024 +0000 + + [Flang][OpenMP] Fix a crash for declare target in an interface (#117709) + + This is a point fix for the crash in #116426. Leaving the bug open to + further explore declare target issues for interfaces. + +commit 3433e4140d18865fe784061a3cd029c5980f4e2f +Author: Valentin Clement (バレンタイン クレメン) +Date: Tue Nov 26 17:04:00 2024 -0800 + + [flang][cuda] Detect constant on the rhs of data transfer (#117806) + + When the rhs expression has some constants and a device symbol, an + implicit data transfer needs to be generated for the device symbol and + the computation with the constant is done on the host. + +commit b9e3a769b99e9dafa3e5205dbbef9fae8573e4e2 +Author: NimishMishra <42909663+NimishMishra@users.noreply.github.com> +Date: Tue Nov 26 16:10:26 2024 +0530 + + [flang][mlir][llvm][OpenMP] Add lowering and translation support for mergeable clause on task (#114662) + + Add FIR generation and LLVMIR translation support for mergeable clause + on task construct. If mergeable clause is present on a task, the + relevant flag in `ompt_task_flag_t` is set and passed to + `__kmpc_omp_task_alloc`. + +commit bb8bf858e865ec3119352bdef43c09adb4c93b31 +Author: jeanPerier +Date: Tue Nov 26 09:21:13 2024 +0100 + + [flang] add internal_assoc flag to mark variable captured in internal procedure (#117161) + + This patch adds a flag to mark hlfir.declare of host variables that are + captured in some internal procedure. + + It enables implementing a simple fir.call handling in + fir::AliasAnalysis::getModRef leveraging Fortran language specifications + and without a data flow analysis. + + This will allow implementing an optimization for "array = + array_function()" where array storage is passed directly into the hidden + result argument to "array_function" when it can be proven that + arraY_function does not reference "array". + + Captured host variables are very tricky because they may be accessed + indirectly in any calls if the internal procedure address was captured + via some global procedure pointer. Without flagging them, there is no + way around doing a complex inter procedural data flow analysis: + - checking that the call is not made to an internal procedure is not + enough because of the possibility of indirect calls made to internal + procedures inside the callee. + - checking that the current func.func has no internal procedure is not + enough because this would be invalid with inlining when an procedure + with internal procedures is inlined inside a procedure without internal + procedure. + +commit 7d6713db600af1b4381149a0e794cbce99ca6cb2 +Author: Ivan R. Ivanov +Date: Tue Nov 19 16:58:30 2024 +0900 + + [flang][omp] Emit omp.workshare in frontend (#101444) + + Emit the contents of OpenMP workshare constructs in `omp.workshare`. + +commit 4c4a4134d5c0a0f9476b157862d378a7e571e9f0 +Author: Krzysztof Parzyszek +Date: Mon Nov 18 07:04:10 2024 -0600 + + [flang][OpenMP] Update frontend support for DEFAULTMAP clause (#116506) + + Add ALL variable category, implement semantic checks to verify the + validity of the clause, improve error messages, add testcases. + + The variable category modifier is optional since 5.0, make sure we allow + it to be missing. If it is missing, assume "all" in clause conversion. + +commit b4c0ef18226b7d1f82d71fc0171b99caec0d8d12 +Author: Kareem Ergawy +Date: Mon Nov 18 08:18:47 2024 +0100 + + [flang][OpenMP] Add MLIR lowering for `loop ... bind` (#114219) + + Extends MLIR lowering support for the `loop` directive by adding + lowering support for the `bind` clause. + + Parent PR: https://github.com/llvm/llvm-project/pull/114199, only the + latest commit is relevant to this PR. + +commit fd3ff2007ab30c74772572798f3e494fdaac7ac2 +Author: Kareem Ergawy +Date: Mon Nov 18 06:23:27 2024 +0100 + + [flang][OpenMP] Add basic support to lower `loop` directive to MLIR (#114199) + + Adds initial support for lowering the `loop` directive to MLIR. + + The PR includes basic suport and testing for the following clauses: + * `collapse` + * `order` + * `private` + * `reduction` + + Parent PR: #113911, only the latest commit is relevant to this PR. + +commit e508bacce45d4fb2ba07d02c55391b858000c3b3 +Author: agozillon +Date: Sat Nov 16 12:28:37 2024 +0100 + + [Flang][OpenMP] Derived type explicit allocatable member mapping (#113557) + + This PR is one of 3 in a PR stack, this is the primary change set which + seeks to extend the current derived type explicit member mapping support + to handle descriptor member mapping at arbitrary levels of nesting. The + PR stack seems to do this reasonably (from testing so far) but as you + can create quite complex mappings with derived types (in particular when + adding allocatable derived types or arrays of allocatable derived types) + I imagine there will be hiccups, which I am more than happy to address. + There will also be further extensions to this work to handle the + implicit auto-magical mapping of descriptor members in derived types and + a few other changes planned for the future (with some ideas on + optimizing things). + + The changes in this PR primarily occur in the OpenMP lowering and the + OMPMapInfoFinalization pass. + + In the OpenMP lowering several utility functions were added or extended + to support the generation of appropriate intermediate member mappings + which are currently required when the parent (or multiple parents) of a + mapped member are descriptor types. We need to map the entirety of these + types or do a "deep copy" for lack of a better term, where we map both + the base address and the descriptor as without the copying of both of + these we lack the information in the case of the descriptor to access + the member or attach the pointers data to the pointer and in the latter + case we require the base address to map the chunk of data. Currently we + do not segment descriptor based derived types as we do with regular + non-descriptor derived types, we effectively map their entirety in all + cases at the moment, I hope to address this at some point in the future + as it adds a fair bit of a performance penalty to having nestings of + allocatable derived types as an example. The process of mapping all + intermediate descriptor members in a members path only occurs if a + member has an allocatable or object parent in its symbol path or the + member itself is a member or allocatable. This occurs in the + createParentSymAndGenIntermediateMaps function, which will also generate + the appropriate address for the allocatable member within the derived + type to use as a the varPtr field of the map (for intermediate + allocatable maps and final allocatable mappings). In this case it's + necessary as we can't utilise the usual Fortran::lower functionality + such as gatherDataOperandAddrAndBounds without causing issues later in + the lowering due to extra allocas being spawned which seem to affect the + pointer attachment (at least this is my current assumption, it results + in memory access errors on the device due to incorrect map information + generation). This is similar to why we do not use the MLIR value + generated for this and utilise the original symbol provided when mapping + descriptor types external to derived types. Hopefully this can be + rectified in the future so this function can be simplified and more + closely aligned to the other type mappings. We also make use of + fir::CoordinateOp as opposed to the HLFIR version as the HLFIR version + doesn't support the appropriate lowering to FIR necessary at the moment, + we also cannot use a single CoordinateOp (similarly to a single GEP) as + when we index through a descriptor operation (BoxType) we encounter + issues later in the lowering, however in either case we need access to + intermediate descriptors so individual CoordinateOp's aid this + (although, being able to compress them into a smaller amount of + CoordinateOp's may simplify the IR and perhaps result in a better end + product, something to consider for the future). + + The other large change area was in the OMPMapInfoFinalization pass, + where the pass had to be extended to support the expansion of box types + (or multiple nestings of box types) within derived types, or box type + derived types. This requires expanding each BoxType mapping from one + into two maps and then modifying all of the existing member indices of + the overarching parent mapping to account for the addition of these new + members alongside adjusting the existing member indices to support the + addition of these new maps which extend the original member indices (as + a base address of a box type is currently considered a member of the box + type at a position of 0 as when lowered to LLVM-IR it's a pointer + contained at this position in the descriptor type, however, this means + extending mapped children of this expanded descriptor type to + additionally incorporate the new member index in the correct location in + its own index list). I believe there is a reasonable amount of comments + that should aid in understanding this better, alongside the test + alterations for the pass. + + A subset of the changes were also aimed at making some of the utilities + for packing and unpacking the DenseIntElementsAttr containing the member + indices shareable across the lowering and OMPMapInfoFinalization, this + required moving some functions to the Lower/Support/Utils.h header, and + transforming the lowering structure containing the member index data + into something more similar to the version used in + OMPMapInfoFinalization. There we also some other attempts at tidying + things up in relation to the member index data generation in the + lowering, some of which required creating a logical operator for the + OpenMP ID class so it can be utilised as a map key (it simply utilises + the symbol address for the moment as ordering isn't particularly + important). + + Otherwise I have added a set of new tests encompassing some of the + mappings currently supported by this PR (unfortunately as you can have + arbitrary nestings of all shapes and types it's not very feasible to + cover them all). + +commit e67e09a77ea1e4802c0f6bc0409c9f5e9d1fae9a +Author: Anchu Rajendran S +Date: Fri Nov 15 09:10:36 2024 -0800 + + [Flang][OpenMP][Sema] Adding parsing and semantic support for scan directive. (#102792) + +commit ff7fca7fa8646d73f884ab8a351e4178499c4d05 +Author: khaki3 <47756807+khaki3@users.noreply.github.com> +Date: Fri Nov 15 08:44:42 2024 -0800 + + [flang][cuda] Support memory cleanup at a return statement (#116304) + + We generate `cuf.free` and `func.return` twice if a return statement + exists at the end of program. + + ```f90 + program test + integer, device :: a(10) + return + end + ``` + + ``` + % flang -x cuda test.cuf -mmlir --mlir-print-ir-after-all + error: loc("/path/to/test.cuf":3:3): 'func.return' op must be the last operation in the parent block + // -----// IR Dump After Fortran::lower::VerifierPass Failed () //----- // + ``` + + Dumped IR: + ```mlir + "func.func"() <{function_type = () -> (), sym_name = "_QQmain"}> ({ + ... + "cuf.free"(%5#1) <{data_attr = #cuf.cuda}> : (!fir.ref>) -> () + "func.return"() : () -> () + "cuf.free"(%5#1) <{data_attr = #cuf.cuda}> : (!fir.ref>) -> () + "func.return"() : () -> () + } + ... + ``` + + The routine `genExitRoutine` in `Bridge.cpp` is guarded by + `blockIsUnterminated()` to make sure that `func.return` is generated + only at the end of a block. However, we redundantly run + `bridge.fctCtx().finalizeAndKeep()` before `genExitRoutine` in this + case, resulting in two pairs of `cuf.free` and `func.return`. This PR + fixes `Bridge.cpp` by using `blockIsUnterminated()` to guard + `finalizeAndKeep` as well. + +commit ec1e0c5ecd53e415b23d5bd40b8e44e3ef4b4d92 +Author: Mats Petersson +Date: Thu Nov 14 09:35:34 2024 +0000 + + [Flang][OMP]Add support for DECLARE MAPPER parsing and semantics (#115160) + + Will hit a TODO in the lowering, which there are tests added to check + for this happening. + +commit 37143fe27e082b478d333ca28f6f1af5210b7c6b +Author: Valentin Clement (バレンタイン クレメン) +Date: Tue Nov 12 16:49:44 2024 -0800 + + [flang][cuda] Make launch configuration optional for cuf kernel (#115947) + +commit 8f9dbb0a780feed60416ebc6ef8e89f4b0c2dca7 +Author: Tom Eccles +Date: Mon Nov 11 11:18:21 2024 +0000 + + [flang][OpenMP] delayed privatisation lowering for TASK (#113591) + +commit 90764582872bc4bd9613646b347b49c60ce2bc72 +Author: Sergio Afonso +Date: Mon Nov 4 10:32:48 2024 +0000 + + Revert "[Flang][OpenMP] Disable lowering of omp.simd reductions in co… (#113683) + + …mposites (#112686)" + + Lowering of reductions in composite operations can now be re-enabled, + since previous commits in this PR stack fix the MLIR representation + produced and it no longer triggers a compiler crash during translation + to LLVM IR. + + This reverts commit c44860c8d2582abd88794267b4fa0fa953bbef80. + +commit 6c28530ed082204a1b6d20b45482e81d4cd5ead4 +Author: Sergio Afonso +Date: Thu Oct 31 16:39:53 2024 +0000 + + [Flang][OpenMP] Properly bind arguments of composite operations (#113682) + + When composite constructs are lowered, clauses for each leaf construct + are lowered before creating the set of loop wrapper operations, using + these outside values to populate their operand lists. Then, when the + loop nest associated to that composite construct is lowered, the binding + of Fortran symbols to the entry block arguments defined by these loop + wrappers is performed, resulting in the creation of `hlfir.declare` + operations in the entry block of the `omp.loop_nest`. + + This approach prevents `hlfir.declare` operations related to the binding + and other operations resulting from the evaluation of the clauses from + being inserted between loop wrapper operations, which would be an + illegal MLIR representation. However, this introduces the problem of + entry block arguments defined by a wrapper that then should be used by + one of its nested wrappers, because the corresponding Fortran symbol + would still be mapped to an outside value at the time of gathering the + list of operands for the nested wrapper. + + This patch adds operand re-mapping logic to update wrappers without + changing when clauses are evaluated or where the `hlfir.declare` + creation is performed. + +commit 06984825061f1bf7c70087833a8d4f6d9feb2865 +Author: Kareem Ergawy +Date: Thu Oct 31 09:19:18 2024 +0100 + + [flang][MLIR] Hoist `do concurrent` nest bounds/steps outside the nest (#114020) + + If you have the following multi-range `do concurrent` loop: + + ```fortran + do concurrent(i=1:n, j=1:bar(n*m, n/m)) + a(i) = n + end do + ``` + + Currently, flang generates the following IR: + + ```mlir + fir.do_loop %arg1 = %42 to %44 step %c1 unordered { + ... + %53:3 = hlfir.associate %49 {adapt.valuebyref} : (i32) -> (!fir.ref, !fir.ref, i1) + %54:3 = hlfir.associate %52 {adapt.valuebyref} : (i32) -> (!fir.ref, !fir.ref, i1) + %55 = fir.call @_QFPbar(%53#1, %54#1) fastmath : (!fir.ref, !fir.ref) -> i32 + hlfir.end_associate %53#1, %53#2 : !fir.ref, i1 + hlfir.end_associate %54#1, %54#2 : !fir.ref, i1 + %56 = fir.convert %55 : (i32) -> index + ... + fir.do_loop %arg2 = %46 to %56 step %c1_4 unordered { + ... + } + } + ``` + + However, if `bar` is impure, then we have a direct violation of the + standard: + + ``` + C1143 A reference to an impure procedure shall not appear within a DO CONCURRENT construct. + ``` + + Moreover, the standard describes the execution of `do concurrent` + construct in multiple stages: + + ``` + 11.1.7.4 Execution of a DO construct + ... + 11.1.7.4.2 DO CONCURRENT loop control + The concurrent-limit and concurrent-step expressions in the concurrent-control-list are evaluated. ... + + 11.1.7.4.3 The execution cycle + ... + The block of a DO CONCURRENT construct is executed for every active combination of the index-name values. + Each execution of the block is an iteration. The executions may occur in any order. + ``` + + From the above 2 points, it seems to me that execution is divided in + multiple consecutive stages: 11.1.7.4.2 is the stage where we evaluate + all control expressions including the step and then 11.1.7.4.3 is the + stage to execute the block of the concurrent loop itself using the + combination of possible iteration values. + +commit c478aab684be007ac14e51565c0d4ae39293d208 +Author: Krzysztof Parzyszek +Date: Wed Oct 30 08:36:08 2024 -0500 + + [flang][OpenMP] Parser support for DEPOBJ plus DEPEND, DESTROY, UPDATE (#114074) + + Parse the DEPOBJ construct and the associated clauses, perform basic + semantic checks. + +commit 55e4e3ff653356a9079906e209099684723caa4c +Author: Sergio Afonso +Date: Wed Oct 30 12:07:47 2024 +0000 + + [Flang][OpenMP] Access full list of entry block syms and vars (NFC) (#113681) + + This patch adds methods to `EntryBlockArgs` to access the full list of + entry block argument-related symbols and variables, in their standard + order. This helps centralizing this logic in as few places as possible + to avoid future inconsistencies. + +commit bd6ab32e6eb642f2b0b15be8c7c2a668192f07d8 +Author: Yusuke MINATO +Date: Mon Oct 28 23:19:20 2024 +0900 + + Revert "[flang] Integrate the option -flang-experimental-integer-overflow into -fno-wrapv" (#113901) + + Reverts llvm/llvm-project#110063 due to the performance regression on + 503.bwaves_r in SPEC2017. + +commit 843c2fbe7f983c2a2059f753e4494f06fb645a9e +Author: Kiran Chandramohan +Date: Fri Oct 25 18:57:01 2024 +0100 + + Add parser+semantics support for scope construct (#113700) + + Test parsing, semantics and a couple of basic semantic checks for + block/worksharing constructs. + Add TODO message in lowering. + +commit 96bb375f5cedcfcc5dcd96296ba54ff933b39d4d +Author: Yusuke MINATO +Date: Fri Oct 25 15:20:23 2024 +0900 + + [flang] Integrate the option -flang-experimental-integer-overflow into -fno-wrapv (#110063) + + nsw is now added to do-variable increment when -fno-wrapv is enabled as + GFortran seems to do. + That means the option introduced by #91579 isn't necessary any more. + + Note that the feature of -flang-experimental-integer-overflow is enabled + by default. + +commit ea3534b385a713639953fb5dfd287af87b52bead +Author: Krzysztof Parzyszek +Date: Thu Oct 24 05:54:35 2024 -0500 + + [flang][OpenMP] Parse AFFINITY clause, lowering not supported yet (#113485) + + Implement parsing of the AFFINITY clause on TASK construct, conversion + from the parser class to omp::Clause. + Lowering to HLFIR is unsupported, a TODO message is displayed. + +commit c44860c8d2582abd88794267b4fa0fa953bbef80 +Author: Sergio Afonso +Date: Mon Oct 21 14:32:21 2024 +0100 + + [Flang][OpenMP] Disable lowering of omp.simd reductions in composites (#112686) + + Currently, the `omp.simd` operation is ignored during MLIR to LLVM IR + translation when it takes part in a composite construct. One consequence + of this limitation is that any entry block arguments defined by that + operation will trigger a compiler crash if they are used anywhere, as + they are not bound to an LLVM IR value. + + A previous PR introducing support for the `reduction` clause resulted in + the creation and use of entry block arguments attached to the `omp.simd` + operation, causing compiler crashes on 'do simd reduction(...)' + constructs. + + This patch disables Flang lowering of simd reductions in 'do simd' + constructs to avoid triggering these errors while translation to LLVM IR + is still incomplete. + +commit 15d85769f119061fbfcae6e9de43982b534ef724 +Author: Sergio Afonso +Date: Wed Oct 16 10:27:50 2024 +0100 + + [Flang][OpenMP] Support lowering of simd reductions (#112194) + + This patch enables lowering to MLIR of the reduction clause of `simd` + constructs. Lowering from MLIR to LLVM IR remains unimplemented, so at + that stage it will result in errors being emitted rather than silently + ignoring it as it is currently done. + + On composite `do simd` constructs, this lowering error will remain + untriggered, as the `omp.simd` operation in that case is currently + ignored. The MLIR representation, however, will now contain `reduction` + information. + +commit 0a17bdfc361400cb511368f2edfc68c0d11e1974 +Author: Sergio Afonso +Date: Tue Oct 15 11:28:39 2024 +0100 + + [MLIR][OpenMP] Remove terminators from loop wrappers (#112229) + + This patch simplifies the representation of OpenMP loop wrapper + operations by introducing the `NoTerminator` trait and updating + accordingly the verifier for the `LoopWrapperInterface`. + + Since loop wrappers are already limited to having exactly one region + containing exactly one block, and this block can only hold a single + `omp.loop_nest` or loop wrapper and an `omp.terminator` that does not + return any values, it makes sense to simplify the representation of loop + wrappers by removing the terminator. + + There is an extensive list of Lit tests that needed updating to remove + the `omp.terminator`s adding some noise to this patch, but actual + changes are limited to the definition of the `omp.wsloop`, `omp.simd`, + `omp.distribute` and `omp.taskloop` loop wrapper ops, Flang lowering for + those, `LoopWrapperInterface::verifyImpl()`, SCF to OpenMP conversion + and OpenMP dialect documentation. + +commit 839344f025fb7eff529735873f327330618b2ebb +Author: Tarun Prabhu +Date: Mon Oct 14 08:44:24 2024 -0600 + + [clang][flang][mlir] Reapply "Support -frecord-command-line option (#102975)" + + The underlying issue was caused by a file included in two different + places which resulted in duplicate definition errors when linking + individual shared libraries. This was fixed in c3201ddaeac02a2c86a38b + [#109874]. + +commit 0163ac1f53abc0a0f6e5b7e56912c1dee67e7f32 +Author: Mats Petersson +Date: Fri Oct 11 12:23:37 2024 +0100 + + [Flang][OpenMP]Add tests for TODOs and small changes to improve messages (#111562) + + The bulk of this change are new tests to check that we get a "Not yet + implemneted: *some stuff here*" message when using some not yet + supported OpenMP functionality. + + For some of these cases, this also means adding additional clauses to a + filter list in OpenMP.cpp - this changes nothing [to the best of my + understanding] other than allowing the clause to get to the point where + it can be rejected in a TODO with a more clear message. One of the TOOD + filters were missing Mergeable clause, so this was also added and the + existing test updated for the new more specific error message. + + There is no functional change intended here. + +commit e71ac933716dc9a747b93b73e899e50b421ebcde +Author: Sergio Afonso +Date: Wed Oct 9 10:24:04 2024 +0100 + + [Flang][OpenMP] Properly reserve space for entry block argument lists (NFC) (#111529) + + This patch adds the size for `use_device_ptr`, which was missing. + +commit b124c04597166cc93ca791d0ad07834c85de824d +Author: Sergio Afonso +Date: Wed Oct 9 10:21:47 2024 +0100 + + [Flang][OpenMP] Remove omp.simd reduction block args (#111523) + + This patch reverts previous changes to create entry block arguments for + reduction variables attached to `simd` constructs. + + This can't currently be done because reduction variables stored in the + corresponding clause structure are not added to the `omp.simd` operation + when created, as this is not supported yet. Adding block arguments for + non-existent reduction variables results in some tests from the Fujitsu + compiler testsuite breaking: + https://linaro.atlassian.net/browse/LLVM-1389. + +commit 88478a89cd85adcc32f2a321ef9e9906c5fdbe26 +Author: Sergio Afonso +Date: Mon Oct 7 11:26:35 2024 +0100 + + [Flang][OpenMP] Improve entry block argument creation and binding (#110267) + + The main purpose of this patch is to centralize the logic for creating + MLIR operation entry blocks and for binding them to the corresponding + symbols. This minimizes the chances of mixing arguments up for + operations having multiple entry block argument-generating clauses and + prevents divergence while binding arguments. + + Some changes implemented to this end are: + - Split into two functions the creation of the entry block, and the + binding of its arguments and the corresponding Fortran symbol. This + enabled a significant simplification of the lowering of composite + constructs, where it's no longer necessary to manually ensure the lists + of arguments and symbols refer to the same variables in the same order + and also match the expected order by the `BlockArgOpenMPOpInterface`. + - Removed redundant and error-prone passing of types and locations from + `ClauseProcessor` methods. Instead, these are obtained from the values + in the appropriate clause operands structure. This also simplifies + argument lists of several lowering functions. + - Access block arguments of already created MLIR operations through the + `BlockArgOpenMPOpInterface` instead of directly indexing the argument + list of the operation, which is not scalable as more entry block + argument-generating clauses are added to an operation. + - Simplified the implementation of `genParallelOp` to no longer need to + define different callbacks depending on whether delayed privatization is + enabled. + +commit 2f245875b2f71272e6d7a78b4aed5be81109e9b9 +Author: Kareem Ergawy +Date: Fri Oct 4 15:24:00 2024 +0200 + + [flang][OpenMP] Handle unstructured CF in compound loop constructs (#111111) + + Fixes a bug in handling unstructured control-flow in compound loop + constructs. The fix makes sure that unstructured CF does not get lowered + until we reach the last item of the compound construct. This way, we + avoid moving block of unstructured loops in-between the middle items of + the construct and messing (i.e. adding operations) to these block while + doing so. + +commit c4204c0b29a6721267b1bcbaeedd7b1118e42396 +Author: jeanPerier +Date: Thu Oct 3 17:10:57 2024 +0200 + + [flang] replace fir.complex usages with mlir complex (#110850) + + Core patch of + https://discourse.llvm.org/t/rfc-flang-replace-usages-of-fir-complex-by-mlir-complex-type/82292. + After that, the last step is to remove fir.complex from FIR types. + +commit f98244392b4e3d4075c03528dcec0b268ba13ab7 +Author: Krzysztof Parzyszek +Date: Wed Oct 2 15:36:45 2024 -0500 + + [flang][OpenMP] Parse lastprivate modifier, add TODO to lowering (#110568) + + Parse the lastprivate clause with a modifier. Codegen for it is not yet + implemented. + +commit d0f67773b213383b6e1c9331fb00f2d4c14bfcb2 +Author: Sergio Afonso +Date: Tue Oct 1 15:04:27 2024 +0100 + + [MLIR][OpenMP] Normalize handling of entry block arguments (#109808) + + This patch introduces a new MLIR interface for the OpenMP dialect aimed + at providing a uniform way of verifying and handling entry block + arguments defined by OpenMP clauses. + + The approach consists in defining a set of overrideable methods that + return the number of block arguments the operation holds regarding each + of the clauses that may define them. These by default return 0, but they + are overriden by the corresponding clause through the + `extraClassDeclaration` mechanism. + + Another set of interface methods to get the actual lists of block + arguments is defined, which is implemented based on the previously + described methods. These implicitly define a standardized ordering + between the list of block arguments associated to each clause, based on + the alphabetical ordering of their names. They should be the preferred + way of matching operation arguments and entry block arguments to that + operation's first region. + + Some updates are made to the printing/parsing of `omp.parallel` to + follow the expected order between `private` and `reduction` clauses, as + well as the MLIR to LLVM IR translation pass to access block arguments + using the new interface. Unit tests of operations impacted by additional + verification checks and sorting of entry block arguments. + +commit 497523b695d06c8bf9f3aaf5a5cb4414a5b0625b +Author: Kareem Ergawy +Date: Thu Sep 26 12:28:14 2024 +0200 + + [flang][OpenMP] Delayed privatization MLIR lowering support for `distribute` (#109632) + + Starts delayed privatizaiton support for standalone `distribute` + directives. Other flavours of `distribute` are still TODO as well as + MLIR to LLVM IR lowering. + +commit 737c414e1d9578e5037e68e3b3f6ddea507f8243 +Author: David Spickett +Date: Fri Sep 20 11:19:12 2024 +0000 + + Revert "[clang][flang][mlir] Support -frecord-command-line option (#102975)" + + This reverts commit b3533a156da92262eb19429d8c12f53e87f5ccec. + + It caused test failures in shared library builds: + https://lab.llvm.org/buildbot/#/builders/80/builds/3854 + +commit b3533a156da92262eb19429d8c12f53e87f5ccec +Author: Tarun Prabhu +Date: Thu Sep 19 18:28:50 2024 -0600 + + [clang][flang][mlir] Support -frecord-command-line option (#102975) + + Add support for the -frecord-command-line option that will produce the + llvm.commandline metadata which will eventually be saved in the object + file. This behavior is also supported in clang. Some refactoring of the + code in flang to handle these command line options was carried out. The + corresponding -grecord-command-line option which saves the command line + in the debug information has not yet been enabled for flang. + +commit 5aaf384b1614fcef5504d0b16d3e5063f72943c1 +Author: Tom Eccles +Date: Mon Sep 16 12:33:37 2024 +0100 + + [flang][NFC] use llvm.intr.stacksave/restore instead of opaque calls (#108562) + + The new LLVM stack save/restore intrinsic operations are more convenient + than function calls because they do not add function declarations to the + module and therefore do not block the parallelisation of passes. + Furthermore they could be much more easily marked with memory effects + than function calls if that ever proved useful. + + This builds on top of #107879. + + Resolves #108016 + +commit b54be00a29f8dabf9b0d9ec69373e859bc75ded4 +Author: Sergio Afonso +Date: Mon Sep 16 12:03:30 2024 +0100 + + [Flang][OpenMP] Process motion clauses in a single call (NFC) (#108046) + + This patch removes the template parameter of the + `ClauseProcessor::processMotionClauses()` method and instead processes + both `TO` and `FROM` as part of a single call. This also enables moving + the implementation out of the header and makes it simpler for a + follow-up patch to potentially refactor `processMap()`, + `processMotionClauses()`, `processUseDeviceAddr()` and + `processUseDevicePtr()`, and minimize code duplication among these. + +commit 8e10a3f80e264aaa186ab3cc74fea840f453c66d +Author: Mats Petersson +Date: Fri Sep 13 12:57:11 2024 +0100 + + [flang][OpenMP] don't privatise loop index marked shared (#108176) + + Mark the symbol with OmpShared, and then check that later in lowering to + avoid making a local loop index. + + OpenMP 5.2 says: "Loop iteration variables of loops that are not associated + with any OpenMP directive maybe listed in data-sharing attribute clauses on + the surrounding teams, parallel or taskgenerating construct, and on enclosed + constructs, subject to other restrictions." + + Tests updated to match the extra OmpShared attribute. + + Add regression test for lowering to hlfir. + + Closes #102961 + + --------- + + Co-authored-by: Tom Eccles + +commit 70ef5eb6f087524dc952a8f5249b79f4a4000e04 +Author: harishch4 +Date: Fri Sep 13 10:11:56 2024 +0530 + + [Flang][OpenMP] Lowering nontemporal clause to MLIR for SIMD directive (#108339) + + Currently, Flang throws a "**not yet implemented: Unhandled clause + NONTEMPORAL in SIMD construct**" error when encountering nontemporal + clause. This patch adds support for this clause in SIMD construct. + +commit 53b59022b07317fa01bf8601d12915dce424baf0 +Author: David Truby +Date: Tue Sep 10 14:59:21 2024 +0100 + + [flang][OpenMP] Implement copyin for pointers and allocatables. (#107425) + + The copyin clause currently forbids pointer and allocatable variables, + which are allowed by the OpenMP 1.1 and 3.0 specifications respectively. + +commit 433ca3ebbef50002bec716ef2c6d6a82db71048d +Author: Sergio Afonso +Date: Tue Sep 10 11:09:25 2024 +0100 + + [Flang][Lower] Introduce SymMapScope helper class (NFC) (#107866) + + This patch creates a simple RAII wrapper class for `SymMap` to make it + easier to use and prevent a missing matching `popScope()` for a + `pushScope()` call on simple use cases. + + Some push-pop pairs are replaced with instances of the new class by this + patch. + +commit 797f01198e8b41982916ba02d703bd6a96b5347e +Author: Leandro Lupori +Date: Thu Sep 5 14:55:01 2024 -0300 + + [flang][OpenMP] Make lastprivate work with reallocated variables (#106559) + + Fixes https://github.com/llvm/llvm-project/issues/100951 + +commit c81b43074ab010d01ad794224dd9dd22bbe8a1f7 +Author: Valentin Clement (バレンタイン クレメン) +Date: Wed Sep 4 08:43:13 2024 -0700 + + [flang][cuda] Fix lowering of cuf kernel with unstructured nested construct (#107149) + + Lowering was crashing when cuf kernels has an unstructured construct. + Blocks created by PFT need to be re-created inside of the operation like + it is done for OpenACC construct. + +commit 9ba41031de105d7babf3ae53facd368f2b4e409f +Author: Akash Banerjee +Date: Wed Sep 4 12:35:44 2024 +0100 + + [OpenMP]Update use_device_clause lowering (#101703) + + This patch updates the use_device_ptr and use_device_addr clauses to use + the mapInfoOps for lowering. This allows all the types that are handle + by the map clauses such as derived types to also be supported by the + use_device_clauses. + + This is patch 1/2 in a series of patches. + + Co-authored-by: Raghu Maddhipatla raghu.maddhipatla@amd.com + +commit 8586d0330e36b22496f9ba5ed116bc1aac5a1f28 +Author: vdonaldson <37090318+vdonaldson@users.noreply.github.com> +Date: Fri Aug 30 09:07:30 2024 -0400 + + [flang] Don't generate empty else blocks (#106618) + + Code lowering always generates fir.if else blocks for source level if + statements, whether needed or not. Change this to only generate else + blocks that are needed. + +commit d4c519e7b2ac21350ec08b23eda44bf4a2d3c974 +Author: Valentin Clement (バレンタイン クレメン) +Date: Thu Aug 29 22:37:20 2024 -0700 + + [flang][cuda] Do inline allocation/deallocation in device code (#106628) + + ALLOCATE and DEALLOCATE statements can be inlined in device function. + This patch updates the condition that determined to inline these actions + in lowering. + + This avoid runtime calls in device function code and can speed up the + execution. + + Also move `isCudaDeviceContext` from `Bridge.cpp` so it can be used + elsewhere. + +commit 0a41c8e7a050c837c609cbcbc8342024701cd14b +Author: Valentin Clement (バレンタイン クレメン) +Date: Thu Aug 29 11:27:42 2024 -0700 + + [flang][cuda] Avoid generating cuf.data_transfer in OpenACC region (#106435) + + `cuf.data_transfer` will be converted to runtime calls to cuda runtime + api and these are not supported in device code. assignment in OpenACC + region will be handled by the OpenACC code gen so we avoid to generate + data transfer on them. + +commit 57726c440c30b3f0b5ebfaf345b0237df4430259 +Author: Sergio Afonso +Date: Thu Aug 29 11:46:23 2024 +0100 + + [Flang][OpenMP] DISTRIBUTE PARALLEL DO SIMD lowering (#106211) + + This patch adds PFT to MLIR lowering support for `distribute parallel do + simd` composite constructs. + +commit 9c8ce5fac8a05e27cca832fb3913ec986b120211 +Author: Sergio Afonso +Date: Thu Aug 29 11:45:28 2024 +0100 + + [Flang][OpenMP] DISTRIBUTE PARALLEL DO lowering (#106207) + + This patch adds PFT to MLIR lowering support for `distribute parallel + do` composite constructs. + +commit 0f206b19c3303aeb8e527b4977da2bd301464a9b +Author: Sergio Afonso +Date: Thu Aug 29 11:44:20 2024 +0100 + + [Flang][OpenMP] Move loop privatization out of dispatch (#106066) + + This patch moves the creation of `DataSharingProcessor` instances for + loop constructs out of `genOMPDispatch()` and into their corresponding + codegen functions. This is a necessary first step to enable a proper + handling of privatization on composite constructs. + + Some tests are updated due to a change of order between clause + processing and privatization. + +commit 60e9fb9dae0e041cb468210f5795e9d59e70cccf +Author: Sergio Afonso +Date: Thu Aug 29 10:37:00 2024 +0100 + + [Flang][OpenMP] Don't expect block arguments using early privatization (#105842) + + There are some spots where all symbols to privatize collected by a + `DataSharingProcessor` instance are expected to have corresponding entry + block arguments associated regardless of whether delayed privatization + was enabled. + + This can result in compiler crashes if a `DataSharingProcessor` instance + created with `useDelayedPrivatization=false` is queried in this way. The + solution proposed by this patch is to provide another public method to + query specifically delayed privatization symbols, which will either be + empty or point to the complete set of symbols to privatize accordingly. + +commit ccbee7116b1d55ab578632635dbf5a7352bbdace +Author: Valentin Clement (バレンタイン クレメン) +Date: Tue Aug 27 17:36:31 2024 -0700 + + [flang][cuda] Use declare op results instead of memref (#106287) + + #106120 Simplify the data transfer when possible by using the reference + and a shape. This bypass the declare op. In order to keep the declare op + around, use the second results of the declare op which achieve the same. + +commit 900cd627582349381bcc0ee74054ca4d9efb55df +Author: Valentin Clement (バレンタイン クレメン) +Date: Tue Aug 27 10:03:15 2024 -0700 + + [flang][cuda] Simplify data transfer when possible (#106120) + + When possible, avoid using descriptors and use the reference and the + shape for data_transfer. + +commit 7af61d5cf464f1d716c82bc77907fa3fe4ebc841 +Author: Valentin Clement (バレンタイン クレメン) +Date: Mon Aug 26 09:50:17 2024 -0700 + + [flang][cuda] Add shape to cuf.data_transfer operation (#104631) + + When doing data transfer with dynamic sized array, we are currently + generating a data transfer between two descriptors. If the shape values + can be provided, we can keep the data transfer between two references. + This patch adds the shape operands to the operation. + + This will be exploited in lowering in a follow up patch. + +commit f4cf93fb509c53771d61a973f27be9b1a90dee0a +Author: agozillon +Date: Fri Aug 23 19:48:43 2024 +0200 + + [Flang][OpenMP] Align map clause generation and fix issue with non-shared allocations for assumed shape/size descriptor types (#97855) + + This PR aims to unify the map argument generation behavior across both + the implicit capture (captured in a target region) and the explicit + capture (process map), currently the varPtr field of the MapInfo for the + same variable will be different depending on how it's captured. This PR + tries to align that across the generations of MapInfoOp in the OpenMP + lowering. + + Currently, I have opted to utilise the rawInput (input memref to a HLFIR + DeclareInfoOp) as opposed to the addr field which includes more + information. The side affect of this is that we have to deal with + BoxTypes less often, which will result in simpler maps in these cases. + The negative side affect of this is that we don't have access to the + bounds information through the resulting value, however, I believe the + bounds information we require in our case is still appropriately stored + in the map bounds, and this seems to be the case from testing so far. + + The other fix is for cases where we end up with a BoxType argument into + a function (certain assumed shape and sizes cases do this) that has no + fir.ref wrapping it. As we need the Box to be a reference type to + actually utilise the operation to access the base address stored inside + and create the correct mappings we currently generate an intermediate + allocation in these cases, and then store into it, and utilise this as + the map argument, as opposed to the original. + + However, as we were not sharing the same intermediate allocation across + all of the maps for a variable, this resulted in errors in certain cases + when detatching/attatching the data e.g. via enter and exit. This PR + adjusts this for cases + + Currently we only maintain tracking of all intermediate allocations for + the current function scope, as opposed to module. Primarily as the only + case I am aware of that this is required is in cases where we pass + certain types of arguments to functions (so I opted to minimize the + overhead of the pass for now). It could likely be extended to module + scope if required if we find other cases where it's applicable and + causing issues. + +commit aa875cfe11ddec239934e37ce07c1cf7804bb73b +Author: Sergio Afonso +Date: Tue Aug 20 11:09:54 2024 +0100 + + [Flang][OpenMP] Prevent re-composition of composite constructs (#102613) + + After decomposition of OpenMP compound constructs and assignment of + applicable clauses to each leaf construct, composite constructs are then + combined again into a single element in the construct queue. This helped + later lowering stages easily identify composite constructs. + + However, as a result of the re-composition stage, the same list of + clauses is used to produce all MLIR operations corresponding to each + leaf of the original composite construct. This undoes existing logic + introducing implicit clauses and deciding to which leaf construct(s) + each clause applies. + + This patch removes construct re-composition logic and updates Flang + lowering to be able to identify composite constructs from a list of leaf + constructs. As a result, the right set of clauses is produced for each + operation representing a leaf of a composite construct. + + PR stack: + - #102612 + - #102613 + +commit 3a3990cb05858e7892a4825c677891a980f1cea8 +Author: Sergio Afonso +Date: Thu Aug 15 10:15:26 2024 +0100 + + [Flang][OpenMP] Move assert for wrapper syms and block args to genLoopNestOp (#103731) + + This patch adds an assert to `genLoopNestClauses` to ensure the number + of symbols and corresponding loop wrapper entry block arguments have the + same size. This is checked by some of the callers, but it makes more + sense moving it into the function itself and avoid having to replicate + it. + +commit b8b82756cb6a8ee71ef1d8f76542458bffdc1538 +Author: Sergio Afonso +Date: Wed Aug 14 10:03:30 2024 +0100 + + [Flang][OpenMP][Lower] Clause lowering cleanup (#103058) + + This patch removes the `ClauseProcessor::processDefault` method due to + it having been implemented in + `DataSharingProcessor::collectDefaultSymbols` instead. + + Also, some `genXyzClauses` functions are updated to avoid triggering + TODO errors for clauses not supported by the corresponding construct and + to keep alphabetical sorting on the order in which clauses are + processed. + +commit 3c5509d9ad25ee49aa68ab0c60d73d9587635b62 +Author: Kazu Hirata +Date: Mon Aug 12 22:53:28 2024 -0700 + + [flang] Use llvm::is_contained (NFC) (#102999) + +commit 90aac06c7f49dd275a49b843b5fd91cb00d549b4 +Author: Tarun Prabhu +Date: Mon Aug 12 11:56:19 2024 -0600 + + [flang][mlir] Add llvm.ident metadata when compiling with flang + + This brings the behavior of flang in line with clang which also adds + this metadata unconditionally. + + Co-authored-by: Tarun Prabhu + +commit f2f41937f31e643471e4e37ef9d7c4eda806adc8 +Author: Akash Banerjee +Date: Mon Aug 12 15:36:25 2024 +0100 + + [OpenMP][MLIR] Set omp.composite attr for composite loop wrappers and add verifier checks (#102341) + + This patch sets the omp.composite unit attr for composite wrapper ops + and also add appropriate checks to the verifiers of supported ops for + the presence/absence of the attribute. + + This is patch 2/2 in a series of patches. Patch 1 - #102340. + +commit ebf530c4e98f09366865dd8c98fff88467e7db72 +Author: Sergio Afonso +Date: Mon Aug 12 10:44:22 2024 +0100 + + [Flang][OpenMP] NFC: Use ConstructQueue::const_iterator (#102612) + + This patch replaces `ConstructQueue::iterator` arguments with + `ConstructQueue::const_iterator` where it's used as a pointer to an + element inside of a `const ConstructQueue &` passed along with it. + + Since these functions don't intend to modify the list or any elements in + it, keeping constness consistent between both makes it simpler to work + with. + +commit 10df3207434e603be5f7e9b3036d821dd5623d3a +Author: Kareem Ergawy +Date: Fri Aug 2 09:46:34 2024 +0200 + + [flang][OpenMP] Enable delayed privatization for `omp parallel` by default (#90945) + + Flips the delayed privatization switch to be on by default. After the + recent fixes related to delayed privatization, the gfortran test suite + runs successfully with delayed privatization turned on by defuault for + `omp parallel`. + +commit fdfeea5bd6763277b5078e33e17e1bfc521a6cba +Author: Sergio Afonso +Date: Mon Jul 29 10:56:45 2024 +0100 + + [MLIR][OpenMP][Flang] Normalize clause arguments names (#99505) + + Currently, there are some inconsistencies to how clause arguments are + named in the OpenMP dialect. Additionally, the clause operand structures + associated to them also diverge in certain cases. The purpose of this + patch is to normalize argument names across all `OpenMP_Clause` tablegen + definitions and clause operand structures. + + This has the benefit of providing more consistent representations for + clauses in the dialect, but the main short-term advantage is that it + enables the development of an OpenMP-specific tablegen backend to + automatically generate the clause operand structures without breaking + dependent code. + + The main re-naming decisions made in this patch are the following: + - Variadic arguments (i.e. multiple values) have the "_vars" suffix. + This and other similar suffixes are removed from array attribute + arguments. + - Individual required or optional value arguments do not have any suffix + added to them (e.g. "val", "var", "expr", ...), except for `if` which + would otherwise result in an invalid C++ variable name. + - The associated clause's name is prepended to argument names that don't + already contain it as part of its name. This avoids future collisions + between arguments named the same way on different clauses and adding + both clauses to the same operation. + - Privatization and reduction related arguments that contain lists of + symbols pointing to privatizer/reducer operations use the "_syms" + suffix. This removes the inconsistencies between the names for + "copyprivate_funcs", "[in]reductions", "privatizers", etc. + - General improvements to names, replacement of camel case for snake + case everywhere, etc. + - Renaming of operation-associated operand structures to use the + "Operands" suffix in place of "ClauseOps", to better differentiate + between clause operand structures and operation operand structures. + - Fields on clause operand structures are sorted according to the + tablegen definition of the same clause. + + The assembly format for a few arguments is updated to better reflect the + clause they are associated with: + - `chunk_size` -> `dist_schedule_chunk_size` + - `grain_size` -> `grainsize` + - `simd` -> `par_level_simd` + +commit 68a0d0c76223736351fd7c452bca3ba9d80ca342 +Author: Kareem Ergawy +Date: Wed Jul 24 13:48:47 2024 +0200 + + [flang][OpenMP] Handle common blocks in delayed privatization (#100317) + + Adds proper mapping of common block elements to block arguments in + parallel regions when delayed privatization is enabled. + +commit 0ee0eeb4bb9be6aeef6c84121ca1af463840fb6a +Author: Valentin Clement (バレンタイン クレメン) +Date: Tue Jul 23 09:49:17 2024 -0700 + + [flang] Enhance location information (#95862) + + Add inclusion location information by using FusedLocation with + attribute. + + More context here: + https://discourse.llvm.org/t/rfc-enhancing-location-information/79650 + +commit 4b9fab591916eec9fd1942f37afe3b137b564089 +Author: David Truby +Date: Fri Jul 19 15:55:36 2024 +0100 + + [flang][OpenMP] Implement lastprivate with collapse (#99500) + + This patch enables the lastprivate clause to be used in the presence of + the collapse clause. + + Note: the way we currently implement lastprivate means that this adds a + large number of compare instructions to the end of every iteration of + the loop. This is a clearly non-optimal thing to do, but lastprivate in + general will need re-implementing to prevent this. This is planned as + part of the delayed privatization work. This current implementation is + just a stop-gap measure as generating sub-optimal but working code is + better than crashing out. + +commit 3ad7108c3cf843cac6301db3f73ccea9661bc4d3 +Author: Valentin Clement (バレンタイン クレメン) +Date: Wed Jul 17 08:39:18 2024 -0700 + + [flang][cuda] Avoid temporary when RHS is a logical constant (#99078) + + Enhance the detection of constant on the RHS for logical cases so we + don't create a temporary. + +commit f1d3fe7aae7867b5de96b84d6d26b5c9f02f209a +Author: Alexis Perry-Holby +Date: Tue Jul 16 09:48:24 2024 -0600 + + Add basic -mtune support (#98517) + + Initial implementation for the -mtune flag in Flang. + + This PR is a clean version of PR #96688, which is a re-land of PR #95043 + +commit e34e739ba88fed7450d232e29b523d247af365ec +Author: Anchu Rajendran S +Date: Fri Jul 12 10:14:38 2024 -0700 + + Adding Changes for invoking Masked Operation (#98423) + + PR adds changes to the flang frontend to create the `MaskedOp` when + `masked` directive is used in the input program. Omp masked is + introduced in 5.2 standard and allows a parallel region to be executed + by threads specified by a programmer. This is achieved with the help of + filter clause which helps to specify thread id expected to execute the + region. + + Other related PRs: + - [Fortran Parsing and Semantic + Support](https://github.com/llvm/llvm-project/pull/91432) - Merged + - [MLIR Support](https://github.com/llvm/llvm-project/pull/96022/files) + - Merged + - [Lowering Support](https://github.com/llvm/llvm-project/pull/98401) - + Under Review + +commit de90391ea88c51da8bcde95206f3f31ecbaf97a3 +Author: Tom Eccles +Date: Fri Jul 12 10:29:21 2024 +0100 + + [flang][OpenMP] Lower REDUCTION clause for SECTIONS (#97858) + + The tricky bit here is that we need to generate the reduction symbol + mapping inside each of the nested SECTION constructs. This is a bit + similar to omp.canonical_loop inside of omp.wsloop, except the SECTION + constructs come from the PFT. + + To make this work I moved the lowering of the SECTION constructs inside + of the lowering SECTIONS (where reduction information is still + available). This subverts the normal control flow for OpenMP lowering a + bit. + + One alternative option I investigated would be to generate the SECTION + CONSTRUCTS as normal as though there were no reduction, and then to fix + them up after control returns back to genSectionsOp. The problem here is + that the code generated for the section body has the wrong symbol + mapping for the reduction variable, so all of the nested code has to be + patched up. In my prototype version this was even more hacky than what + the solution I settled upon. + +commit 9b6504e98359f5d14fdaa353b2789e7e95239f96 +Author: Valentin Clement (バレンタイン クレメン) +Date: Thu Jul 11 17:15:54 2024 -0700 + + [flang][cuda] Make sure to issue freemem for the allocated temp (#98078) + + When implicit data transfer is created, make sure we generate the + `freemem` op on the `allocmem` result value and not the declare op + value. + +commit bd7b16217bbac4b1e1a25c7bf9566db715ca9b10 +Author: Valentin Clement (バレンタイン クレメン) +Date: Tue Jul 9 10:13:00 2024 -0700 + + [flang][cuda] Add conversion for stream value in cuf kernel directive (#98082) + + The stream value is defined as an i32 value in the operation. Add a + conversion so the declared integer can be different and an i32 value. + +commit 918ac62916d48649f224f8c54837d25baff97a08 +Author: Sergio Afonso +Date: Tue Jul 9 11:17:21 2024 +0100 + + [Flang][OpenMP] Add lowering support for DISTRIBUTE SIMD (#97819) + + This patch adds support for lowering 'DISTRIBUTE SIMD' constructs to + MLIR. Translation of `omp.distribute` operations to LLVM IR is still not + supported, so its composition with `omp.simd` isn't either. + +commit 52d2d8200807357a582e089eaa95692b2c77da2e +Author: Sergio Afonso +Date: Tue Jul 9 11:15:35 2024 +0100 + + [Flang][OpenMP] Add lowering support for DO SIMD (#97718) + + This patch adds support for lowering 'DO SIMD' constructs to MLIR. SIMD + information is now stored in an `omp.simd` loop wrapper, which is + currently ignored by the OpenMP dialect to LLVM IR translation stage. + + The end result is that runtime behavior of compiled 'DO SIMD' constructs + does not change after this patch, so 'DO SIMD' still runs like 'DO' + (i.e. SIMD width = 1). However, all of the required information is now + present in the resulting MLIR representation. + + To avoid confusion, the previous wsloop-simd.f90 lit test is renamed to + wsloop-schedule.f90 and a new wsloop-simd.f90 test is created to check + the addition of SIMD clauses to the `omp.simd` operation produced when a + 'DO SIMD' construct is lowered to MLIR. + +commit 2b56005fd91de3adba1957e70575d5461329178b +Author: Sergio Afonso +Date: Tue Jul 9 10:32:20 2024 +0100 + + [Flang][OpenMP] Refactor loop-related lowering for composite support (#97566) + + This patch splits the lowering for `omp.loop_nest` into its own function + and updates lowering for all supported loop wrappers to stop creating + this operation themselves. + + Lowering functions for loop constructs are split into "wrapper" and + "standalone" variants, where the "wrapper" version only creates the + specific operation with nothing inside of it and the "standalone" + version calls the former and also handles clause processing and creates + the nested `omp.loop_nest`. + + "Wrapper" lowering functions can be used by "composite" lowering + functions in follow-up patches, minimizing code duplication. + + Tests broken as a result of reordering between the processing of the + loop wrapper's and the nested `omp.loop_nest`'s clauses are also + updated. + +commit 720b958953d1dd34c8e3a52588ab26c52edfc2a6 +Author: Sergio Afonso +Date: Fri Jul 5 10:38:03 2024 +0100 + + [Flang][OpenMP] NFC: Share DataSharingProcessor creation logic for all loop directives (#97565) + + This patch moves the logic associated with the creation of a + `DataSharingProcessor` instance for loop-associated OpenMP leaf + constructs to the `genOMPDispatch` function, avoiding code duplication + for standalone and composite loop constructs. + + This also prevents privatization-related allocations to be later made + inside of loop wrappers when support for composite constructs is + implemented. + +commit 2d0c4c363b4b39014b469c30234cf06894e06d6a +Author: Sergio Afonso +Date: Fri Jul 5 10:13:30 2024 +0100 + + [Flang][OpenMP] Remove unused OpWithBodyGenInfo attributes (#97572) + + This patch removes the `outerCombined`, `reductionSymbols` and + `reductionTypes` attributes from the `OpWithBodyGenInfo` structure and + their uses, as they never impact the lowering process or its output. + + The `outerCombined` variable is always set to `false`, so in practice it + doesn't represent what its name indicates. Furthermore, initializing it + correctly can result in privatization not being performed in cases where + it should (at least tests doing this together with composite construct + support pointed me in that direction). It seems to be tied to the early + privatization approach, where a redundant alloca could possibly be + avoided in certain cases. With the transition to delayed privatization, + it seems like it won't serve that purpose anymore, since the decision of + what and where privatization-related allocations are inserted will be + postponed to the MLIR to LLVM IR translation stage. Since this feature + is already currently not being used, its potential benefit appears to be + minor and it won't make sense to do once the delayed privatization + approach is rolled out, I propose removing it. + + The `reductionSymbols` and `reductionTypes` variables are set in certain + cases but never used. Unless there's a plan where these will be needed, + in which case it would be a better alternative to document it, I believe + we should also remove them. + +commit 817f0d9d3099a25ea83fcf633071ccaaeac09954 +Author: Sergio Afonso +Date: Thu Jul 4 15:31:20 2024 +0100 + + [Flang][OpenMP] NFC: Remove unused argument for omp.target lowering (#97564) + + This patch removes the `outerCombined` argument from `genTargetOp()` and + the `processReduction` argument from `genTargetClauses()`, as they + aren't used. + +commit 03579455bd941da6278f883ed8827ef0fbeb5e50 +Author: agozillon +Date: Wed Jul 3 07:07:53 2024 +0200 + + [Flang][OpenMP] More elegantly handle declare target in unnamed program (#95834) + + This PR is related to the following issue: + + https://github.com/llvm/llvm-project/issues/63362 + + It tries to solve the crash (which is now slightly different, since the + issue has been languishing for a while sorry about that I missed the + original issue ping). + + The crash occurs due to trying to access the symbol of an + undefined/unnamed main when trying to find a declare target symbol that + has not been specified (but can be assumed based on it's residence in a + function or interface). + + The solution in this PR will check if we're trying to retrieve a main + symbol, and then if that is the case, we make sure it exists (due to + being named) before we attempt to retrieve it, this avoids the crash. + + However, that's only part of the issue in the above example, the other + is the significant amount of nested directives, I think we are still a + little while away from handling this, I have added a reduced variation + of the test in the issue as a replicator which contains a lesser number + of nesting directives. To push the issue along further, it will likely + be a case of working through a number of variations of nested directives + in conjunction with target + parallel. + + However, this PR pushes the issue above to the point where the issue + encountered is identical to the following: + https://github.com/llvm/llvm-project/issues/67231 + +commit 66d5ca2a3d8df780951ce2987157ef03e73393c7 +Author: jeanPerier +Date: Tue Jul 2 15:19:49 2024 +0200 + + Reland "[flang] add extra component information in fir.type_info" (#97404) + + Reland #96746 with the proper Support/CMakelist.txt change. + + fir.type does not contain all Fortran level information about + components. For instance, component lower bounds and default initial + value are lost. For correctness purpose, this does not matter because + this information is "applied" in lowering (e.g., when addressing the + components, the lower bounds are reflected in the hlfir.designate). + + However, this "loss" of information will prevent the generation of + correct debug info for the type (needs to know about lower bounds). The + initial value could help building some optimization pass to get rid of + initialization runtime calls. + + This patch adds lower bound and initial value information into + fir.type_info via a new fir.dt_component operation. This operation is + generated only for component that needs it, which helps keeping the IR + small for "boring" types. + + In general, adding Fortran level info in fir.type_info will allow + delaying the generation of "type descriptors" gobals that are very + verbose in FIR and make it hard to work with FIR dumps from applications + with many derived types. + +commit 29cdc8f9ca58411992d3fa5afd89e0628df24679 +Author: Leandro Lupori +Date: Mon Jul 1 14:10:35 2024 -0300 + + [flang][OpenMP] Fix nested privatization of allocatable (#96968) + + In nested constructs where a given variable is privatized more than + once, using the default clause, the innermost host association symbol + will point to the previous host association symbol. + Such symbol lacks the allocatable attribute and can't be used to + generate the type of the symbol to be cloned. Use the ultimate + symbol instead. + + Fixes #85594, #80398 + +commit 03d9a317725cdc03a0558eb49ff53bcc5c45dd08 +Author: Sergio Afonso +Date: Mon Jul 1 11:08:14 2024 +0100 + + [Flang][OpenMP] Update flang with changes to the OpenMP dialect (#92524) + + This patch applies fixes after the updates to OpenMP clause operands, as + well as updating some tests that were impacted by changes to the + ordering or assembly format of some clauses in MLIR. + +commit 4a746e50b14f252b40e03860562636dd1ded8044 +Author: Kareem Ergawy +Date: Fri Jun 28 05:20:59 2024 +0200 + + [NFC][flang] Remove unused `converter` parameter. (#96854) + + Removes an unused parameter in 2 utils that generate bounds ops. + +commit 6a66b8224d8cbdb6156b9a12d9339fee71898941 +Author: jeanPerier +Date: Thu Jun 27 19:22:48 2024 +0200 + + Revert "[flang] add extra component information in fir.type_info" (#96937) + + Reverts llvm/llvm-project#96746 + Breaking shared library buillds: + https://lab.llvm.org/buildbot/#/builders/89/builds/931 + +commit 1448ed2000ff0be17025dab0aad7412d054425eb +Author: jeanPerier +Date: Thu Jun 27 18:59:03 2024 +0200 + + [flang] add extra component information in fir.type_info (#96746) + + fir.type does not contain all Fortran level information about + components. For instance, component lower bounds and default initial + value are lost. For correctness purpose, this does not matter because + this information is "applied" in lowering (e.g., when addressing the + components, the lower bounds are reflected in the hlfir.designate). + + However, this "loss" of information will prevent the generation of + correct debug info for the type (needs to know about lower bounds). The + initial value could help building some optimization pass to get rid of + initialization runtime calls. + + This patch adds lower bound and initial value information into + fir.type_info via a new fir.dt_component operation. This operation is + generated only for component that needs it, which helps keeping the IR + small for "boring" types. + + In general, adding Fortran level info in fir.type_info will allow + delaying the generation of "type descriptors" gobals that are very + verbose in FIR and make it hard to work with FIR dumps from applications + with many derived types. + +commit b4ab52c8e71e819c13606de3500043eaa701e1ea +Author: harishch4 +Date: Thu Jun 27 11:58:12 2024 +0530 + + [Flang][OpenMP] Lowering Order clause to MLIR (#96730) + +commit 8dd9494056d6797144dfabbbfb6d478c95375019 +Author: Tarun Prabhu +Date: Tue Jun 25 13:25:39 2024 -0600 + + Revert "[flang] Add basic -mtune support" (#96678) + + Reverts llvm/llvm-project#95043 + +commit aec735cf476c3975b026aa79fa40dda06a27fac3 +Author: agozillon +Date: Tue Jun 25 20:54:04 2024 +0200 + + [Flang][OpenMP][MLIR] Fix common block mapping for regular and declare target link (#91829) + + This PR attempts to fix common block mapping for regular mapping of + these types as well as when they have been marked as "declare target + link". This PR should allow correct mapping of both the members of a + common block and the full common block via its block symbol. + + The main changes were some adjustments to the Fortran OpenMP lowering to + HLFIR/FIR, the lowering of the LLVM+OpenMP dialect to LLVM-IR and + adjustments to the way the we handle target kernel map argument + rebinding inside of the OMPIRBuilder. + + For the Fortran OpenMP lowering were two changes, one to prevent the + implicit capture of common block members when the common block symbol + itself has been marked and the other creates intermediate member access + inside of the target region to be used in-place of those external to the + target region, this prevents external usages breaking the + IsolatedFromAbove pact. + + In the latter case, there was an adjustment to the size calculation for + types to better handle cases where we pass an array as the type of a map + (as opposed to the bounds and the type of the element), which occurs in + the case of common blocks. There is also some adjustment to how + handleDeclareTargetMapVar handles renaming of declare target symbols in + the module to the reference pointer, now it will only apply to those + within the kernel that is currently being generated and we also perform + a modification to replace constants with instructions as necessary as we + cannot replace these with our reference pointer (non-constant and + constants do not mix nicely). + + In the case of the OpenMPIRBuilder some changes were made to defer + global symbol rebinding to kernel arguments until all other arguments + have been rebound. This makes sure we do not replace uses that may refer + to the global (e.g. a GEP) but are themselves actually a separate + argument that needs bound. + + Currently "declare target to" still needs some work, but this may be the + case for all types in conjunction with "declare target to" at the + moment. + +commit a790279bf2a8be2f9c42bf80f55a63933e398d0e +Author: Alexis Perry-Holby +Date: Tue Jun 25 11:39:35 2024 -0600 + + [flang] Add basic -mtune support (#95043) + + This PR adds -mtune as a valid flang flag and passes the information + through to LLVM IR as an attribute on all functions. No specific + architecture optimizations are added at this time. + +commit 952bdaaf79c1e5d7364160b21de0cd1295cdfbd8 +Author: Leandro Lupori +Date: Tue Jun 25 09:25:41 2024 -0300 + + [flang][OpenMP] Fix copyprivate allocatable/pointer lowering (#95975) + + The lowering of copyprivate clauses with allocatable or pointer + variables was incorrect. This happened because the values passed to + copyVar() are always wrapped in SymbolBox::Intrinsic, which + resulted in allocatable/pointer variables being handled as regular + ones. + + This is fixed by providing to copyVar() the attributes of the + variables being copied, to make it possible to detect and handle + allocatable/pointer variables correctly. + + Fixes #95801 + +commit 8e8dccdecd4a5302fcfad33b4ee1282ae808b106 +Author: Valentin Clement (バレンタイン クレメン) +Date: Wed Jun 19 13:35:02 2024 -0700 + + [flang][cuda] Do not consider PINNED as device attribute (#95988) + + PINNED is a CUDA data attribute meant for the host variables. Do not + consider it when computing the number of device variables in assignment + for the cuda data transfer. + +commit 506b4cdae0929ff4bc7174cb580b5e55b8a74a0b +Author: David Truby +Date: Tue Jun 18 14:25:56 2024 +0100 + + [flang] Change vector always errors to warnings (#95908) + +commit 77d8cfb3c50e3341d65af1f9e442004bbd77af9b +Author: Alexander Shaposhnikov <6532716+alexander-shaposhnikov@users.noreply.github.com> +Date: Mon Jun 17 12:59:04 2024 -0700 + + [Flang] Switch to common::visit more call sites (#90018) + + Switch to common::visit more call sites. + + Test plan: ninja check-all + +commit 85f4593e856e5034c5de1e6bbea13fb59e1995f5 +Author: khaki3 <47756807+khaki3@users.noreply.github.com> +Date: Mon Jun 17 09:21:30 2024 -0700 + + [flang] Add a REDUCE clause to each nested loop (#95555) + + For DO CONCURRENT REDUCE, every nested loop should have a REDUCE clause + so that we can lower reduction without analysis. + +commit c6b6e18c4d25305ab98b6eab752de99ea4e15344 +Author: David Truby +Date: Fri Jun 14 14:10:41 2024 +0100 + + [flang] Implement !DIR$ VECTOR ALWAYS (#93830) + + This patch implements support for the VECTOR ALWAYS directive, which + forces + vectorization to occurr when possible regardless of a decision by the + cost + model. This is done by adding an attribute to the branch into the loop + in LLVM + to indicate that the loop should always be vectorized. + + This patch only implements this directive on plan structured do loops + without labels. Support for unstructured loops and array + expressions is planned for future patches. + +commit 7ffeaf0e187b41994f63ae82e73e123b942cd16b +Author: harishch4 +Date: Fri Jun 14 09:37:38 2024 +0530 + + [MLIR][Flang][OpenMP] Implement lowering simd aligned to MLIR (#95198) + + Rebased @DominikAdamski patch: https://reviews.llvm.org/D142722 + + --------- + + Co-authored-by: Dominik Adamski + Co-authored-by: Tom Eccles + +commit 7665d3d90da7f32e56cb57eb192dc8f189730686 +Author: Iman Hosseini +Date: Wed Jun 12 19:18:41 2024 +0100 + + [flang] Add reductions for CUF Kernels: Lowering (#95184) + + * Add reductionOperands and reductionAttrs to cuf's KernelOp. + * Parsing is already working and the tree has the info: here I make the + Bridge emit the updated KernelOp with reduction information added. + * Check |reductionAttrs| = |reductionOperands| in verifier + * Add a test + @clementval @vzakhari + + --------- + + Co-authored-by: Iman Hosseini + Co-authored-by: Valentin Clement (バレンタイン クレメン) + +commit 87374a8cffb6b6f589e8810a4d8502623e9d0268 +Author: vdonaldson <37090318+vdonaldson@users.noreply.github.com> +Date: Wed Jun 12 09:35:14 2024 -0400 + + [flang] Add support for lowering directives at the CONTAINS level (#95123) + + There is currently support for lowering directives that appear outside + of a module or procedure, or inside the body of a module or procedure. + Extend this to support directives at the CONTAINS level of a module or + procedure, such as directives 3, 5, 7 9, and 10 in: + + !dir$ some directive 1 + module m + !dir$ some directive 2 + contains + !dir$ some directive 3 + subroutine p + !dir$ some directive 4 + contains + !dir$ some directive 5 + subroutine s1 + !dir$ some directive 6 + end subroutine s1 + !dir$ some directive 7 + subroutine s2 + !dir$ some directive 8 + end subroutine s2 + !dir$ some directive 9 + end subroutine p + !dir$ some directive 10 + end module m + !dir$ some directive 11 + + This is done by looking for CONTAINS statements at the module or + procedure level, while ignoring CONTAINS statements at the derived type + level. + +commit fc1c34bbcb7811ebdbb623c5a4473c4f186c434d +Author: Sergio Afonso +Date: Wed Jun 12 12:34:23 2024 +0100 + + [Flang][OpenMP][Lower] Add lowering support of OpenMP distribute to MLIR (#67798) + + This patch adds support for lowering the OpenMP DISTRIBUTE directive + from PFT to MLIR. It only supports standalone DISTRIBUTE, support for + composite constructs will come in follow-up PRs. + +commit e7d569a0faa833623af59d4eab5d6277ce031d9e +Author: Valentin Clement +Date: Mon Jun 10 08:50:08 2024 -0700 + + [flang] Fix copy creation in #94718 + +commit f11e08fb26642fddebdefca5bec933fe39e4bd03 +Author: khaki3 <47756807+khaki3@users.noreply.github.com> +Date: Mon Jun 10 08:41:05 2024 -0700 + + [flang] Generate fir.do_loop reduce from DO CONCURRENT REDUCE clause (#94718) + + Derived from #92480. This PR updates the lowering process of DO + CONCURRENT to support F'2023 REDUCE clause. The structure + `IncrementLoopInfo` is extended to have both reduction operations and + symbols in `reduceSymList`. The function `getConcurrentControl` + constructs `reduceSymList` for the innermost loop. Finally, + `genFIRIncrementLoopBegin` builds `fir.do_loop` with reduction operands. + +commit 1539da4601448711fcfa622e26e596973d58c670 +Author: Kareem Ergawy +Date: Fri Jun 7 18:08:25 2024 +0200 + + [flang][OpenMP] Add `--openmp-enable-delayed-privatization-staging` flag (#94749) + +commit 913a8244fe8687df1f27b61c87aa23cf4fcbe84e +Author: Kareem Ergawy +Date: Fri Jun 7 14:44:01 2024 +0200 + + [flang][OpenMP] Lower `target .. private(..)` to `omp.private` ops (#94195) + + Extends delayed privatization support to `taraget .. private(..)`. With + this PR, `private` is support for `target` **only** is delayed + privatization mode. + +commit 8b18f2fe066b9f895185f1d94c6cf34901590164 +Author: Krzysztof Parzyszek +Date: Wed Jun 5 13:38:28 2024 -0500 + + [flang][OpenMP] Add `sym()` member function to omp::Object (#94493) + + The object identity requires more than just `Symbol`. Don't use `id()` + to get the Symbol associated with the object, becase the return value + will need to change. Instead use `sym()` which is added for that reason. + +commit b9549261e218cee2ad1305fb7272b831799b7bfe +Author: Sergio Afonso +Date: Wed Jun 5 14:43:58 2024 +0100 + + [Flang][OpenMP] Add -fopenmp-force-usm option to flang (#94359) + + This patch enables the `-fopenmp-force-usm` option to be passed to the + flang driver, which forwards it to the compiler frontend. This flag, + when set, results in the introduction of the `unified_shared_memory` bit + to the `omp.requires` attribute of the top-level module operation. + + This is later combined with any other target device-related REQUIRES + clauses that may have been explicitly set in the compilation unit. + +commit c7593344f48e64af29fd9512852f24f9ebe5a4c6 +Author: Peter Klausler <35819229+klausler@users.noreply.github.com> +Date: Mon Jun 3 12:58:39 2024 -0700 + + [flang] Better error message for RANK(NULL()) (#93577) + + We currently complain that the argument may not be a procedure, which is + confusing. Distinguish the NULL() case from other error cases (which are + indeed procedures). And clean up the utility predicates used for these + tests -- the current IsProcedure() is really just a test for a procedure + designator. + +commit d1aa9bac3c8ecc30fcc5d4d80a1f70c729aec909 +Author: jeanPerier +Date: Mon Jun 3 17:20:07 2024 +0200 + + [flang] lower select rank (#93967) + + Lower select rank according to [assumed-rank lowering design + doc](https://github.com/llvm/llvm-project/blob/main/flang/docs/AssumedRank.md). + + The construct is lowered using fir.box_rank and fir.select_case + operation and, for the non pointer/allocatable case, a + fir.is_assumed_size + conditional branch before the select_case to deal + with the assumed-size case. + + The way the CFG logic is generated, apart from the extra conditional + branch for assumed-size, is similar to what is done for SELECT CASE + lowering, hence the sharing of the construct level visitor. + For the CFG parts. The main difference is that we need to keep track of + the selector to cook it and map it inside the cases (hence the new + members of the ConstructContext). + + The only TODOs left are to deal with the RANK(*) case for polymorphic + entities and PDTs. I will do the polymorphic case in a distinct patch, + this patch has enough content. + + Fortran::evaluate::IsSimplyContiguous change is needed to avoid generating + copy-in/copy-out runtime calls when passing the RANK(*) associating + entity to some implicit interface. + +commit 6af4118f1557eb7ac07147607bd23e90c5bf2b35 +Author: Kareem Ergawy +Date: Mon May 27 14:26:52 2024 +0200 + + Reapply #91116 with fix (#93160) + + This PR contains 2 commits: + 1. A commit to reapply changes introduced #91116 (was reverted earlier + due to test suite failures) + 2. A commit containing a possible solution for the issue causing the + test suite failures. In particular, it introduces a simple symbol + visitor class to keep track of the current active OMP construct and + marking this active construct as the scope defining the symbol being + visisted. + +commit 1a2a0c0dc9aab6f440033f36ff2323685080f46a +Author: Anchu Rajendran S +Date: Thu May 23 13:46:35 2024 +0530 + + Fixing the location attribute added to mapInfoOp (#90764) + + Named location attribute added to `tgt_offload_entry` shall be used by + runtime calls like `ompx_dump_mapping_tables` to print the information + of variables that are mapped to the device. `ompx_dump_mapping_tables` + was printing the wrong location information and this change fixes it. + + A sample execution of example before the change: + ``` + omptarget device 0 info: OpenMP Host-Device pointer mappings after block at libomptarget:0:0: + + omptarget device 0 info: Host Ptr Target Ptr Size (B) DynRefCount HoldRefCount Declaration + + omptarget device 0 info: 0x0000000000206df0 0x00007f02cdc00000 20000000 1 0 at unknown:18:35 + ``` + + The change replaces unknown to the mapped symbol and location to the + declaration location. + +commit 0bc710f7c19910817ccff254c43496602635bbc9 +Author: Valentin Clement (バレンタイン クレメン) +Date: Tue May 21 12:42:30 2024 -0700 + + [flang][cuda] Accept constant as src for cuf.data_tranfer (#92951) + + Assignment of a constant (host) to a device variable is a special case + that can be further lowered to `cudaMemset` or similar functions. This + patch update the lowering to avoid the creation of a temporary when we + assign a constant to a device variable. + +commit 1fc3ce1cdb8390ed64feea939a9555d3642439ea +Author: Valentin Clement (バレンタイン クレメン) +Date: Tue May 21 11:23:55 2024 -0700 + + [flang][cuda] Enable data transfer for descriptors (#92804) + + Remove the TODO when data transfer is done with descriptor variables. + +commit c1b5b7c19b76f8d153f7ae9350d217b74888ed93 +Author: Krzysztof Parzyszek +Date: Tue May 21 08:19:54 2024 -0500 + + [flang][Lower] Emit exiting branches from within constructs (#92455) + + When lowering IfConstruct, CaseConstruct, and SelectTypeConstruct, emit + branches that exit the construct in each block that is still + unterminated after the FIR has been generated in it. + + The same thing may be needed for SelectRankConstruct, once it's + supported. + + This eliminates the need for inserting branches in `genFIR(Evaluation)`. + + Follow-up to PR https://github.com/llvm/llvm-project/pull/91614. + +commit 6658e1a3fdfebfc9d1805029ca0e4de643634927 +Author: Anchu Rajendran S +Date: Mon May 20 21:32:41 2024 -0700 + + Adding parsing and semantic check support for omp masked (#91432) + + omp masked directive in OpenMP 5.2 allows to specify code regions which + are expected to be executed by thread ids specified by the programmer. + Filter clause of the directive allows to specify the thread id. This + change adds the parsing support for the directive + +commit 85e1124049cd8aa1e58c101e082100ba74df7e42 +Author: Muhammad Omair Javaid +Date: Tue May 21 06:50:43 2024 +0500 + + Revert "[flang][OpenMP] Try to unify induction var privatization for OMP regions. (#91116)" + + This reverts commit 2a97b507dc643b7ee3bc651b3f21b754cfba433c. + + It has broken LLVM testsuite on various bots + https://lab.llvm.org/buildbot/#/builders/184/builds/12760 + https://lab.llvm.org/buildbot/#/builders/197/builds/14376 + https://lab.llvm.org/buildbot/#/builders/179/builds/10176 + +commit 2a97b507dc643b7ee3bc651b3f21b754cfba433c +Author: Kareem Ergawy +Date: Sat May 18 08:39:58 2024 +0200 + + [flang][OpenMP] Try to unify induction var privatization for OMP regions. (#91116) + +commit 45daa4fdc68f5faa5bd5c33da052d2415cd88540 +Author: Valentin Clement (バレンタイン クレメン) +Date: Fri May 17 09:37:53 2024 -0700 + + [flang][cuda] Move CUDA Fortran operations to a CUF dialect (#92317) + + The number of operations dedicated to CUF grew and where all still in + FIR. In order to have a better organization, the CUF operations, + attributes and code is moved into their specific dialect and files. CUF + dialect is tightly coupled with HLFIR/FIR and their types. + + The CUF attributes are bundled into their own library since some + HLFIR/FIR operations depend on them and the CUF dialect depends on the + FIR types. Without having the attributes into a separate library there + would be a dependency cycle. + +commit 74a87548e5b62881108e6cd1fd63b45580fc3097 +Author: Tom Eccles +Date: Thu May 16 15:27:59 2024 +0100 + + [flang][MLIR][OpenMP] make reduction by-ref toggled per variable (#92244) + + Fixes #88935 + + Toggling reduction by-ref broke when multiple reduction clauses were + used. Decisions made for the by-ref status for later clauses could then + invalidate decisions for earlier clauses. For example, + + ``` + reduction(+:scalar,scalar2) reduction(+:array) + ``` + + The first clause would choose by value reduction and generate by-value + reduction regions, but then after this the second clause would force + by-ref to support the array argument. But by the time the second clause + is processed, the first clause has already had the wrong kind of + reduction regions generated. + + This is solved by toggling whether a variable should be reduced by + reference per variable. In the above example, this allows only `array` + to be reduced by ref. + +commit 7a66e4209b0b4cc0dc871a54c4f07a4b0054b5f7 +Author: Krzysztof Parzyszek +Date: Thu May 16 07:49:01 2024 -0500 + + [flang][OpenMP] Remove unnecessary `Fortran::` qualification, NFC (#92298) + + The `Fortran::` namespace is redundant for all parts of the code in this + PR, except for names of functions in their definitions. + +commit 526553b25131a69d9d6426e17c7b69c2ba27144f +Author: Yusuke MINATO +Date: Thu May 16 13:16:07 2024 +0900 + + [flang] Add nsw flag to do-variable increment with a new option (#91579) + + This patch adds nsw flag to the increment of do-variables when a new + option is enabled. + NOTE 11.10 in the Fortran 2018 standard says they never overflow. + + See also the discussion in #74709 and the following discourse post. + https://discourse.llvm.org/t/rfc-add-nsw-flags-to-arithmetic-integer-operations-using-the-option-fno-wrapv/77584/5 + +commit 4525f442fadb7cc44cc2eaede2c8ac6ba15bdf78 +Author: Krzysztof Parzyszek +Date: Wed May 15 12:01:16 2024 -0500 + + [flang][OpenMP] Don't pass clauses to op-generating functions anymore (#90108) + + Remove parameter `const List &clauses` from functions that take + construct queue. The clauses should now be accessed from the construct + queue. + +commit 415616daa0bdf6c0065c4c1967f1c4050e6ea836 +Author: Krzysztof Parzyszek +Date: Tue May 14 07:33:07 2024 -0500 + + [flang][OpenMP] Lower standalone ops via OMP dispatch, NFC (#92045) + + This moves lowering of standalone OpenMP ops into the dispatch function. + Follow-up to PR90098. + +commit c7c5666aac543a49b485a133f4a94865e2613a43 +Author: jeanPerier +Date: Tue May 14 13:34:46 2024 +0200 + + [flang] Do not hoist all scalar sub-expressions from WHERE constructs (#91395) + + The HLFIR pass lowering WHERE (hlfir.where op) was too aggressive in its + hoisting of scalar sub-expressions from LHS/RHS/MASKS outside of the + loops generated for the WHERE construct. + This violated F'2023 10.2.3.2 point 10 that stipulated that elemental + operations must be evaluated only for elements corresponding to true + values, because scalar operations are still elemental, and hoisting them + is invalid if they could have side effects (e.g, division by zero) and + if the MASK is always false (i.e., the loop body is never evaluated). + + The difficulty is that 10.2.3.2 point 9 mandates that nonelemental + function must be evaluated before the loops. So it is not possible to + simply stop hoisting non hlfir.elemental operations. + Marking calls with an elemental/nonelemental attribute would not allow + the pass to be correct if inlining is run before and drops this + information, beside, extracting the argument tree that may have been + CSE-ed with the rest of the expression evaluation would be a bit + combursome. + + Instead, lower nonelemental calls into a new hlfir.exactly_once + operation that will allow retaining the information that the operations + contained inside its region must be hoisted. This allows inlining to + operate before if desired in order to improve alias analysis. + + The LowerHLFIROrderedAssignments pass is updated to only hoist the + operations contained inside hlfir.exactly_once bodies. + +commit 1066eb55477044a3a92f3a40471375194dfcdbc8 +Author: Kazu Hirata +Date: Mon May 13 09:33:43 2024 -0700 + + [flang] Fix a warning + + This patch fixes: + + flang/lib/Lower/OpenMP/OpenMP.cpp:2346:14: error: unused variable + 'origDirective' [-Werror,-Wunused-variable] + +commit be7c9e39572d876c16b6a8d7f4addaf9409071ff +Author: Krzysztof Parzyszek +Date: Mon May 13 08:09:24 2024 -0500 + + [flang][OpenMP] Decompose compound constructs, do recursive lowering (#90098) + + A compound construct with a list of clauses is broken up into individual + leaf/composite constructs. Each such construct has the list of clauses + that apply to it based on the OpenMP spec. + + Each lowering function (i.e. a function that generates MLIR ops) is now + responsible for generating its body as described below. + + Functions that receive AST nodes extract the construct, and the clauses + from the node. They then create a work queue consisting of individual + constructs, and invoke a common dispatch function to process (lower) the + queue. + + The dispatch function examines the current position in the queue, and + invokes the appropriate lowering function. Each lowering function + receives the queue as well, and once it needs to generate its body, it + either invokes the dispatch function on the rest of the queue (if any), + or processes nested evaluations if the work queue is at the end. + + Re-application of ca1bd5995f6ed934f9187305190a5abfac049173 with fixes for + compilation errors. + +commit 25a3ba33153e99c4614d404ba18b761d652e24de +Author: Krzysztof Parzyszek +Date: Mon May 13 08:42:06 2024 -0500 + + Revert "[flang][OpenMP] Decompose compound constructs, do recursive lowering (#90098)" + + It breaks some builds, e.g. + https://lab.llvm.org/buildbot/#/builders/268/builds/13909 + + This reverts commit ca1bd5995f6ed934f9187305190a5abfac049173. + +commit ca1bd5995f6ed934f9187305190a5abfac049173 +Author: Krzysztof Parzyszek +Date: Mon May 13 08:09:24 2024 -0500 + + [flang][OpenMP] Decompose compound constructs, do recursive lowering (#90098) + + A compound construct with a list of clauses is broken up into individual + leaf/composite constructs. Each such construct has the list of clauses + that apply to it based on the OpenMP spec. + + Each lowering function (i.e. a function that generates MLIR ops) is now + responsible for generating its body as described below. + + Functions that receive AST nodes extract the construct, and the clauses + from the node. They then create a work queue consisting of individual + constructs, and invoke a common dispatch function to process (lower) the + queue. + + The dispatch function examines the current position in the queue, and + invokes the appropriate lowering function. Each lowering function + receives the queue as well, and once it needs to generate its body, it + either invokes the dispatch function on the rest of the queue (if any), + or processes nested evaluations if the work queue is at the end. + +commit a427aa9346295fe7dd3be5955214d28c8be2ad4a +Author: Krzysztof Parzyszek +Date: Fri May 10 15:04:39 2024 -0500 + + [flang][Lower] Treat directives with nested evaluations as constructs (#91614) + + When generating block terminators in `genFIR(Evaluation)`, treat + `Directives` with nested evaluations the same way as `Constructs` to + determine the successor block. + + This fixes https://github.com/llvm/llvm-project/issues/91526 + +commit 435e850ba97ab567a14b6c84d2b27cadb771cb27 +Author: Andrew Gozillon +Date: Mon Feb 12 10:53:28 2024 -0600 + + [Flang][OpenMP][MLIR] Initial derived type member map support + + This patch is one in a series of four patches that seeks to refactor + slightly and extend the current record type map support that was + put in place for Fortran's descriptor types to handle explicit + member mapping for record types at a single level of depth. + + For example, the below case where two members of a Fortran + derived type are mapped explicitly: + + '''' + type :: scalar_and_array + real(4) :: real + integer(4) :: array(10) + integer(4) :: int + end type scalar_and_array + type(scalar_and_array) :: scalar_arr + + !$omp target map(tofrom: scalar_arr%int, scalar_arr%real) + '''' + + Current cases of derived type mapping left for future work are: + > explicit member mapping of nested members (e.g. two layers of + record types where we explicitly map a member from the internal + record type) + > Fortran's automagical mapping of all elements and nested elements + of a derived type + > explicit member mapping of a derived type and then constituient members + (redundant in Fortran due to former case but still legal as far as I am aware) + > explicit member mapping of a record type (may be handled reasonably, just + not fully tested in this iteration) + > explicit member mapping for Fortran allocatable types (a variation of nested + record types) + + This patch seeks to support this by extending the Flang-new OpenMP lowering to + support generation of this newly required information, creating the neccessary + parent <-to-> member map_info links, calculating the member indices and + setting if it's a partial map. + + The OMPDescriptorMapInfoGen pass has also been generalized into a map + finalization phase, now named OMPMapInfoFinalization. This pass was extended + to support the insertion of member maps into the BlockArg and MapOperands of + relevant map carrying operations. Similar to the method in which descriptor types + are expanded and constituient members inserted. + + Pull Request: https://github.com/llvm/llvm-project/pull/82853 + +commit 1710c8cf0f8def4984893e9dd646579de5528d95 +Author: Slava Zakharin +Date: Wed May 8 16:48:14 2024 -0700 + + [flang] Lowering changes for assigning dummy_scope to hlfir.declare. (#90989) + + The lowering produces fir.dummy_scope operation if the current + function has dummy arguments. Each hlfir.declare generated + for a dummy argument is then using the result of fir.dummy_scope + as its dummy_scope operand. This is only done for HLFIR. + + I was not able to find a reliable way to identify dummy symbols + in `genDeclareSymbol`, so I added a set of registered dummy symbols + that is alive during the variables instantiation for the current + function. The set is initialized during the mapping of the dummy + argument symbols to their MLIR values. It is reset right after + all variables are instantiated - this is done to avoid generating + hlfir.declare operations with dummy_scope for the clones of + the dummy symbols (e.g. this happens with OpenMP privatization). + + If this can be done in a cleaner way, please advise. + +commit f72454086af9d3f91a86e10dc1923849c5f670a8 +Author: Valentin Clement (バレンタイン クレメン) +Date: Tue May 7 08:29:21 2024 -0700 + + [flang][cuda] Fix retrieval of nested evaluation in cuf kernel (#91298) + + `loopEval` was declared inside the for loop to iterate over the nested + loops so the same loop control was redeclared for each level of the loop + nest. Make sure we are iterating over all the loops by putting + `loopEval` declaration ouside of the for loop. + +commit 6542e5663d1e599d5ed7c961842ce2fcdc9f5090 +Author: Leandro Lupori +Date: Mon May 6 13:14:18 2024 -0300 + + [flang][OpenMP] Move privatizations out of sections (#88191) + + Besides duplicating code, privatizing variables in every section + causes problems when synchronization barriers are used. This + happens because each section is executed by a given thread, which + will cause the program to hang if not all running threads execute + the barrier operation. + + Fixes https://github.com/llvm/llvm-project/issues/72824 + +commit 24f5fc77d43f4ae2dc1cb0c0902c5e22cbadf09e +Author: Kareem Ergawy +Date: Sat May 4 21:20:17 2024 +0200 + + [flang][MLIR][OpenMP] Extend delayed privatization for arrays and characters (#85023) + +commit cda8270981b666c492933a9df1d984d0d0f8433f +Author: vdonaldson <37090318+vdonaldson@users.noreply.github.com> +Date: Fri May 3 09:11:10 2024 -0400 + + [flang] Source code location for IF statements and constructs (#90853) + + Make source code locations for IF statements and IF construct component + statements more accurate. Make similar changes to ASSOCIATE, BLOCK, and + SELECT TYPE construct component statements. + +commit 37f6ba4fb2db2c78cda7d0a69cd0a2eff2b924e3 +Author: NimishMishra <42909663+NimishMishra@users.noreply.github.com> +Date: Thu May 2 21:58:20 2024 -0700 + + [flang][OpenMP] Fix construct privatization in default clause (#72510) + + Current implementation of default clause privatization incorrectly fails + to privatize in presence of non-OpenMP constructs (i.e. nested + constructs with regions whose symbols need to be privatized in the scope + of the parent OpenMP construct). This patch fixes the same by + considering non-OpenMP constructs separately by collecting symbols of a + nested region if it is a non-OpenMP construct with a region, and + privatizing it in the scope of the parent OpenMP construct. + + Fixes https://github.com/llvm/llvm-project/issues/71914 and + https://github.com/llvm/llvm-project/issues/71915 + +commit 57d0d3b4d638d170035f55d79f0202f1042de345 +Author: Kiran Chandramohan +Date: Wed May 1 12:58:50 2024 +0100 + + [Flang][OpenMP] Handle more character allocatable cases in privatization (#90449) + + Fixes #84732, #81947, #81946 + + Note: This is a fix till we enable delayed privatization. + +commit ecec1311fe0521404a11d6f3b90253259c8c3518 +Author: David Truby +Date: Tue Apr 30 22:38:36 2024 +0100 + + [flang] Remove double pointer indirection for _QQEnvironmentDefaults (#90615) + + A double pointer was being passed to the call to FortranStart rather than just a pointer to the EnvironmentDefaults.list. This now passes `null` directly when there's no EnvironmentDefaults.list and passes the list directly when there is, removing the original global variable which was a pointer to a pointer containing null or the EnvironmentDefaults.list global. + + Fixes #90537 + +commit 33ccd037fcd2b4346065ebcdcbb5d8c1887c2639 +Author: Krzysztof Parzyszek +Date: Tue Apr 30 11:44:55 2024 -0500 + + [flang][OpenMP] Pass symTable to all genXYZ functions, NFC (#90090) + + This will unify the interface a bit more. + +commit f815d1f71f644a6cfd2c22bf7898a1034be235ad +Author: Valentin Clement (バレンタイン クレメン) +Date: Tue Apr 30 08:27:28 2024 -0700 + + [flang][cuda] Fix iv store in cuf kernel (#90551) + + Store of the current induction value to the user IV was not placed + correctly in the body of the cuf kernel. + + @ImanHosseini + +commit 8d5386669ed63548daf1bee415596582d6d78d7d +Author: David Truby +Date: Mon Apr 29 14:16:25 2024 +0100 + + [flang] Generate main only when a Fortran program statement is present (#89938) + + This patch changes the behaviour for flang to only create and link to a + `main` entry point when the Fortran code has a program statement in it. + + This means that flang-new can be used to link even when the program is + a mixed C/Fortran code with `main` present in C and no entry point + present in Fortran. + + This also removes the `-fno-fortran-main` flag as this no longer has any + functionality. + +commit fac349a169976f822fb27f03e623fa0d28aec1f3 +Author: Christian Sigg +Date: Sun Apr 28 22:01:42 2024 +0200 + + Reapply "[mlir] Mark `isa/dyn_cast/cast/...` member functions depreca… (#90406) + + …ted. (#89998)" (#90250) + + This partially reverts commit 7aedd7dc754c74a49fe84ed2640e269c25414087. + + This change removes calls to the deprecated member functions. It does + not mark the functions deprecated yet and does not disable the + deprecation warning in TypeSwitch. This seems to cause problems with + MSVC. + +commit eb5907d06f2ffc3ed7fb8d4772bd3f8eab89c726 +Author: Valentin Clement (バレンタイン クレメン) +Date: Fri Apr 26 13:31:34 2024 -0700 + + [flang][cuda] Avoid to issue data transfer in device context (#90247) + + Data transfer should not be issued in device function. + +commit 7aedd7dc754c74a49fe84ed2640e269c25414087 +Author: dyung +Date: Fri Apr 26 12:09:13 2024 -0700 + + Revert "[mlir] Mark `isa/dyn_cast/cast/...` member functions deprecated. (#89998)" (#90250) + + This reverts commit 950b7ce0b88318f9099e9a7c9817d224ebdc6337. + + This change is causing build failures on a bot + https://lab.llvm.org/buildbot/#/builders/216/builds/38157 + +commit 950b7ce0b88318f9099e9a7c9817d224ebdc6337 +Author: Christian Sigg +Date: Fri Apr 26 16:28:30 2024 +0200 + + [mlir] Mark `isa/dyn_cast/cast/...` member functions deprecated. (#89998) + + See https://mlir.llvm.org/deprecation and + https://discourse.llvm.org/t/preferred-casting-style-going-forward. + +commit 09cdfd68a6cce69cd4c935b8c38ad391cea265ae +Author: Valentin Clement (バレンタイン クレメン) +Date: Thu Apr 25 08:50:52 2024 -0700 + + [flang][cuda] Avoid hlfir.declare verifier error when creating temps (#89984) + + When creating temporaries for implicit transfer, the newly create + hlfir.declare operation was missing some information like the shape and + the verifier was throwing an error. Fix it by making sure we have an + ExtendedValue when calling addSymbol to register the temp. + + ``` + error: loc("cuda-data-transfer.cuf":67:22): 'hlfir.declare' op of array entity + with a raw address base must have a shape operand that is a shape or shapeshift + ``` + + Thanks @jeanPerier for the advice! + + FYI @ImanHosseini + +commit 5cfd5d157cb5aca7ee348bd3e8b4fcd35a5762c8 +Author: Valentin Clement (バレンタイン クレメン) +Date: Thu Apr 25 08:50:34 2024 -0700 + + [flang][cuda] Do not generate data transfer within cuf kernel (#89973) + + CUDA data transfer with intrinsic assignment are not meant to be + generated in cuf kernel. This patch fix this issue. + + @ImanHosseini + +commit ca4dbc271842e8c9b5ed01bd66b687ab703896d0 +Author: Sergio Afonso +Date: Wed Apr 24 14:30:03 2024 +0100 + + [Flang][OpenMP][Lower] Update workshare-loop lowering (5/5) (#89215) + + This patch updates lowering from PFT to MLIR of workshare loops to + follow the loop wrapper approach. Unit tests impacted by this change are + also updated. + + As the last patch of the stack, this should compile and pass unit tests. + +commit cb2639196842630c0a1d5f91e26261f1c42b49fb +Author: Peter Klausler <35819229+klausler@users.noreply.github.com> +Date: Mon Apr 22 15:21:45 2024 -0700 + + [flang] Make proc characterization error conditional for generics (#89429) + + When the characteristics of a procedure depend on a procedure that + hasn't yet been defined, the compiler currently emits an unconditional + error message. This includes the case of a procedure whose + characteristics depend, perhaps indirectly, on itself. However, in the + case where the characteristics of a procedure are needed to resolve a + generic, we should not emit an error for a hitherto undefined procedure + -- either the call will resolve to another specific procedure, in which + case the error is spurious, or it won't, and then an error will issue + anyway. + + Fixes https://github.com/llvm/llvm-project/issues/88677. + +commit 9c9dea943706340f8a45dc74887bf9beddd67810 +Author: Krzysztof Parzyszek +Date: Mon Apr 22 13:04:20 2024 -0500 + + [flang][OpenMP] Concatenate begin and end clauses into single list (#89090) + + This will remove the distinction between begin clauses and end clauses, + and process all of them together. + +commit 9dbf3e2384e450c2b4f282b85b9ec47c65976194 +Author: Sergio Afonso +Date: Fri Apr 19 16:13:10 2024 +0100 + + [Flang][OpenMP] NFC: Simplify handling of insertion points (#89221) + + This patch replaces some `saveInsertionPoint`, `restoreInsertionPoint` + call pairs for an `InsertionGuard` instance where it makes sense within + Flang OpenMP lowering to make further modifications less error-prone. + +commit 992413de99588a60920f934de07d703efb432ade +Author: Krzysztof Parzyszek +Date: Thu Apr 18 12:02:04 2024 -0500 + + [flang][OpenMP] Move clause/object conversion to happen early, in genOMP (#87086) + + This removes the last use of genOmpObjectList2, which has now been + removed. + + --------- + + Co-authored-by: Sergio Afonso + +commit c8dca5bc0733e2fba81008fc33fcad1f45ba666a +Author: Sergio Afonso +Date: Wed Apr 17 12:17:50 2024 +0100 + + [Flang][OpenMP][Lower] Refactor lowering of compound constructs (#87070) + + This patch simplifies the lowering from PFT to MLIR of OpenMP compound + constructs (i.e. combined and composite). + + The new approach consists of iteratively processing the outermost leaf + construct of the given combined construct until it cannot be split + further. Both leaf constructs and composite ones have `gen...()` + functions that are called when appropriate. + + This approach enables treating a leaf construct the same way regardless + of if it appeared as part of a combined construct, and it also enables + the lowering of composite constructs as a single unit. + + Previous corner cases are now handled in a more straightforward way and + comments pointing to the relevant spec section are added. Directive sets + are also completed with missing LOOP related constructs. + +commit 3eb0ba34b0a2a29c2f34ead2b84fdf9b62cb29c1 +Author: Sergio Afonso +Date: Wed Apr 17 11:28:30 2024 +0100 + + [MLIR][Flang][OpenMP] Make omp.simdloop into a loop wrapper (#87365) + + This patch updates the definition of `omp.simdloop` to enforce the + restrictions of a wrapper operation. It has been renamed to `omp.simd`, + to better reflect the naming used in the spec. All uses of "simdloop" in + function names have been updated accordingly. + + Some changes to Flang lowering and OpenMP to LLVM IR translation are + introduced to prevent the introduction of compilation/test failures. The + eventual long term solution might be different. + +commit 4dd5180a2d43b088d7637c30c2654f3c01c46987 +Author: Sergio Afonso +Date: Tue Apr 16 11:08:25 2024 +0100 + + [Flang][OpenMP][Lower] Split MLIR codegen for clauses and constructs (#86963) + + This patch performs several cleanups with the main purpose of + normalizing the code patterns used to trigger codegen for MLIR OpenMP + operations and making the processing of clauses and constructs + independent. The following changes are made: + + - Clean up unused `directive` argument to + `ClauseProcessor::processMap()`. + - Move general helper functions in OpenMP.cpp to the appropriate section + of the file. + - Create `genClauses()` functions containing the clause + processing code specific for the associated OpenMP construct. + - Update `genOp()` functions to call the corresponding + `genClauses()` function. + - Sort calls to `ClauseProcessor::process()` alphabetically, + to avoid inadvertently relying on some arbitrary order. Update some + tests that broke due to the order change. + - Normalize `genOMP()` functions so they all delegate the generation of + MLIR to `genOp()` functions following the same pattern. + - Only process `nowait` clause on `TARGET` constructs if not compiling + for the target device. + + A later patch can move the calls to `genClauses()` out of + `genOp()` functions and passing completed clause structures + instead, in preparation to supporting composite constructs. That will + make it possible to reuse clause processing for a given leaf construct + when appearing alone or in a combined or composite construct, while + controlling where the associated code is produced. + +commit 76782e28869abf93716f72f195d55c28eaf263ed +Author: Kiran Chandramohan +Date: Tue Apr 16 10:29:26 2024 +0100 + + [Flang][OpenMP] NFC: Remove old reduction lowering code (#88798) + + The old code was replaced by + https://github.com/llvm/llvm-project/pull/80019. + +commit 78eac466095c205988ef1e2380033d042a169a3b +Author: Sergio Afonso +Date: Fri Apr 12 12:42:41 2024 +0100 + + [Flang][OpenMP][Lower] Use clause operand structures (#86802) + + This patch updates Flang lowering to use the new set of OpenMP clause + operand structures and their groupings into directive-specific sets of + clause operands. + + It simplifies the passing of information from the clause processor and + the creation of operations. + + The `DataSharingProcessor` is slightly modified to not hold delayed + privatization state. Instead, optional arguments are added to + `processStep1` which are only passed when delayed privatization is used. + This enables using the clause operand structure for `private` and + removes the need for the ad-hoc `DelayedPrivatizationInfo` structure. + + The processing of the `schedule` clause is updated to process the + `chunk` modifier rather than requiring two separate calls to the + `ClauseProcessor`. + + Lowering of a block-associated `ordered` construct is updated to emit a + TODO error if the `simd` clause is specified, since it is not currently + supported by the `ClauseProcessor` or later compilation stages. + + Removed processing of `schedule` from `omp.simdloop`, as it doesn't + apply to `simd` constructs. + +commit 298ea9bfd50ca41c77e45065700df06adb6264ae +Author: Raghu Maddhipatla <7686592+raghavendhra@users.noreply.github.com> +Date: Thu Apr 11 10:26:54 2024 -0500 + + [Flang] [OpenMP] [MLIR] [Lowering] Add lowering support for IS_DEVICE_PTR and HAS_DEVICE_ADDR clauses on OMP TARGET directive. (#88206) + + Added lowering support for IS_DEVICE_PTR and HAS_DEVICE_ADDR clauses for + OMP TARGET directive and added related tests for these changes. + + IS_DEVICE_PTR and HAS_DEVICE_ADDR clauses apply to OMP TARGET directive + OpenMP spec states + + The **is_device_ptr** clause indicates that its list items are device + pointers. + + The **has_device_addr** clause indicates that its list items already + have device addresses and therefore they may be directly accessed from a + target device. + + Whereas USE_DEVICE_PTR and USE_DEVICE_ADDR clauses apply to OMP TARGET + DATA directive and OpenMP spec for them states + + Each list item in the **use_device_ptr** clause results in a new list + item that is a device pointer that refers to a device address + + Each list item in a **use_device_addr** clause that is present in the + device data environment is treated as if it is implicitly mapped by a + map clause on the construct with a map-type of alloc + + Fixed build error caused by Squash merge which needs rebase + +commit eec41d2f8d81b546d7b97648cca6b2d656104bd3 +Author: Raghu Maddhipatla <7686592+raghavendhra@users.noreply.github.com> +Date: Tue Apr 9 16:18:56 2024 -0500 + + Revert "[Flang] [OpenMP] [Semantics] [MLIR] [Lowering] Add lowering support for IS_DEVICE_PTR and HAS_DEVICE_ADDR clauses on OMP TARGET directive." (#88198) + + Reverts llvm/llvm-project#74187 + +commit 9d9560facb5597e0232ab15716a7915a33d4f0a6 +Author: Raghu Maddhipatla <7686592+raghavendhra@users.noreply.github.com> +Date: Tue Apr 9 14:59:20 2024 -0500 + + [Flang] [OpenMP] [Semantics] [MLIR] [Lowering] Add lowering support for IS_DEVICE_PTR and HAS_DEVICE_ADDR clauses on OMP TARGET directive. (#74187) + + Added lowering support for IS_DEVICE_PTR and HAS_DEVICE_ADDR clauses for + OMP TARGET directive and added related tests for these changes. + + IS_DEVICE_PTR and HAS_DEVICE_ADDR clauses apply to OMP TARGET directive + OpenMP spec states + + `The **is_device_ptr** clause indicates that its list items are device + pointers.` + + `The **has_device_addr** clause indicates that its list items already + have device addresses and therefore they may be directly accessed from a + target device.` + + Whereas USE_DEVICE_PTR and USE_DEVICE_ADDR clauses apply to OMP TARGET + DATA directive and OpenMP spec for them states + + `Each list item in the **use_device_ptr** clause results in a new list + item that is a device pointer that refers to a device address` + + `Each list item in a **use_device_addr** clause that is present in the + device data environment is treated as if it is implicitly mapped by a + map clause on the construct with a map-type of alloc` + +commit 3f2f700633bbcc0cb5ada17f5736b43f9c1e426e +Author: Jie Fu +Date: Sat Apr 6 07:31:53 2024 +0800 + + [flang] Fix -Wunused-but-set-variable in Bridge.cpp (NFC) + + llvm-project/flang/lib/Lower/Bridge.cpp:3775:14: + error: variable 'nbDeviceResidentObject' set but not used [-Werror,-Wunused-but-set-variable] + unsigned nbDeviceResidentObject = 0; + ^ + 1 error generated. + +commit 953aa102a90099ae655eaa4645dd8d15c95ea86a +Author: Valentin Clement (バレンタイン クレメン) +Date: Fri Apr 5 09:11:37 2024 -0700 + + [flang][cuda] Lower device to host and device to device transfer (#87387) + + Add more support for CUDA data transfer in assignment. This patch adds + device to device and device to host support. If device symbols are + present on the rhs, some implicit data transfer are initiated. A + temporary is created and the data are transferred to the host. The + expression is evaluated on the host and the assignment is done. + +commit a4798bb0b67533b37d6b34fd5292714aac3b17d9 +Author: jeanPerier +Date: Tue Apr 2 14:29:29 2024 +0200 + + [flang][NFC] use mlir::SymbolTable in lowering (#86673) + + Whenever lowering is checking if a function or global already exists in + the mlir::Module, it was doing module->lookup. + + On big programs (~5000 globals and functions), this causes important + slowdowns because these lookups are linear. Use mlir::SymbolTable to + speed-up these lookups. The SymbolTable has to be created from the + ModuleOp and maintained in sync. It is therefore placed in the + converter, and FirOPBuilders can take a pointer to it to speed-up the + lookups. + + This patch does not bring mlir::SymbolTable to FIR/HLFIR passes, but + some passes creating a lot of runtime calls could benefit from it too. + More analysis will be needed. + + As an example of the speed-ups, this patch speeds-up compilation of + Whizard compare_amplitude_UFO.F90 from 5 mins to 2 mins on my machine + (there is still room for speed-ups). + +commit 79199753fd6c39aac881b9556614c5db2775dc85 +Author: Krzysztof Parzyszek +Date: Thu Mar 28 07:46:01 2024 -0500 + + [flang][OpenMP] Make several function local to OpenMP.cpp, NFC (#86726) + + There were several functions, mostly reduction-related, that were only + called from OpenMP.cpp. Remove them from OpenMP.h, and make them local + in OpenMP.cpp: + - genOpenMPReduction + - findReductionChain + - getConvertFromReductionOp + - updateReduction + - removeStoreOp + + Also, move the function bodies out of the "public" section. + +commit 4d177435bae03551245ffdc4dfcee5345323121d +Author: Krzysztof Parzyszek +Date: Wed Mar 27 11:37:09 2024 -0500 + + [flang][OpenMP] Rename makeList overloads to make{Objects,Clauses}, NFC (#86725) + + Reserve `makeList` to create a list given an explicit converter + function. + +commit 148a55795de7ac465a8e494d5d382e100da643f6 +Author: Krzysztof Parzyszek +Date: Tue Mar 26 13:54:26 2024 -0500 + + [flang][OpenMP] Make OpenMP clause representation language-agnostic (#86289) + + The clause templates defined in ClauseT.h were originally based on + flang's parse tree nodes. Since those representations are going to be + reused for clang (together with the clause splitting code), it makes + sense to separate them from flang, and instead have them based on the + actual OpenMP spec (v5.2). + + The member names in the templates follow the naming presented in the + spec, and the representation (e.g. members) is derived from the clause + definitions as described in the spec. + + Since the representations of some clauses has changed (while preserving + the information), the current code using the clauses (especially the + code converting parser::OmpClause to omp::Clause) needs to be adjusted. + + This patch does not make any functional changes. + +commit 4998587e6f5f66d464ac22ad4c11fe9afd2d56ab +Author: Daniel Chen +Date: Tue Mar 26 11:29:24 2024 -0400 + + [Flang] Support for passing procedure pointer, reference to a function that returns a procedure pointer to structure constructor. (#86533) + + This PR fixes `not yet implemented: procedure pointer component in + structure constructor` as shown in the following test case. + + ``` + MODULE M + TYPE :: DT + PROCEDURE(Fun), POINTER, NOPASS :: pp1 + END TYPE + + CONTAINS + + INTEGER FUNCTION Fun(Arg) + INTEGER :: Arg + Fun = Arg + END FUNCTION + + END MODULE + + PROGRAM MAIN + USE M + IMPLICIT NONE + TYPE (DT) :: v2 + PROCEDURE(FUN), POINTER :: pp2 + v2 = DT(pp2) + v2 = DT(bar()) + CONTAINS + FUNCTION BAR() RESULT(res) + PROCEDURE(FUN), POINTER :: res + END + END + ``` + +commit 4e6745cc4db309c0e1b5e41d4598f67763f4c096 +Author: Valentin Clement (バレンタイン クレメン) +Date: Mon Mar 25 11:53:39 2024 -0700 + + [flang][cuda] Lower simple host to device data transfer (#85960) + + In CUDA Fortran data transfer can be done via assignment statements + between host and device variables. + + This patch introduces a `fir.cuda_data_transfer` operation that + materialized the data transfer between two memory references. + + Simple transfer not involving descriptors from host to device are also + lowered in this patch. When the rhs is an expression that required an + evaluation, a temporary is created. The evaluation is done on the host + and then the transfer is initiated. + + Implicit transfer when device symbol are present on the rhs is not part + of this patch. Transfer from device to host is not part of this patch. + +commit 564035edb0e67a177fd911fc779cd64300a9b5ef +Author: Krzysztof Parzyszek +Date: Mon Mar 25 09:54:04 2024 -0500 + + [flang][OpenMP] Organize `genOMP` functions in OpenMP.cpp, NFC (#86309) + + Put all of the genOMP functions together, organize them in two groups: + for declarative constructs and for other (executable) constructs. + + Replace visit functions for OpenMPDeclarativeConstruct and + OpenMPConstruct from listing individual visitors for each variant + alternative to using a single generic visitor. Essentially, going from + ``` + std::visit( + [](foo x) { genOMP(foo); } + [](bar x) { TODO } + [](baz x) { genOMP(baz); } + ) + ``` + to + ``` + void genOMP(bar x) { // Separate visitor for an unhandled case + TODO + } + + [...] + std::visit([&](auto &&s) { genOMP(s); }) // generic + ``` + + This doesn't change any functionality, just reorganizes the functions a + bit. The intent here is to improve the readability of this file. + +commit de7a50fb88faa1dafee33f10149561936214062b +Author: jeanPerier +Date: Fri Mar 22 11:13:04 2024 +0100 + + [flang] Fix lowering of host associated cray pointee symbols (#86121) + + Cray pointee symbols can be host associated from a module or host + procedure while the related cray pointer is not explicitly associated. + This caused the "not yet implemented: lowering symbol to HLFIR" to fire + when lowering a reference to the cray pointee and fetching the cray + pointer. + + This patch: + - Ensures cray pointers are always instantiated when instantiating a + cray pointee. + - Fix internal procedure lowering to deal with cray pointee host + association like it does for pointers (the lowering strategy for cray + pointee is to create a pointer that is updated with the cray pointer + value before being fetched). + + This should fix the bug reported in + https://github.com/llvm/llvm-project/issues/85420. + +commit 2ab106cbd428984df3dda2f6983d5f956917cb69 +Author: Krzysztof Parzyszek +Date: Thu Mar 21 15:12:43 2024 -0500 + + [flang][OpenMP] Convert processTODO and remove unused objects (#81627) + + Remove `ClauseIterator2` and `clauses2` from ClauseProcessor. + + [Clause representation 5/6] + +commit 734026347cca85cf0e242ef5f04896f55e0ac113 +Author: Sergio Afonso +Date: Thu Mar 21 12:25:48 2024 +0000 + + Reapply "[Flang][OpenMP][Lower] NFC: Move clause processing helpers into the ClauseProcessor (#85258)" (#85807) + + This patch contains slight modifications to the reverted PR #85258 to + avoid issues with constructs containing multiple reduction clauses, + uncovered by a test on the gfortran testsuite. + + This reverts commit 9f80444c2e669237a5c92013f1a42b91b5609012. + +commit 84115494d6475e1aea3cdd1163d3a88243b75f36 +Author: Krzysztof Parzyszek +Date: Wed Mar 20 15:00:29 2024 -0500 + + [flang][Lower] Convert OMP Map and related functions to evaluate::Expr (#81626) + + The related functions are `gatherDataOperandAddrAndBounds` and + `genBoundsOps`. The former is used in OpenACC as well, and it was + updated to pass evaluate::Expr instead of parser objects. + + The difference in the test case comes from unfolded conversions of index + expressions, which are explicitly of type integer(kind=8). + + Delete now unused `findRepeatableClause2` and `findClause2`. + + Add `AsGenericExpr` that takes std::optional. It already returns + optional Expr. Making it accept an optional Expr as input would reduce + the number of necessary checks when handling frequent optional values in + evaluator. + + [Clause representation 4/6] + +commit d84252e064b3f35aa879c10e207f77e931f351d9 +Author: Sergio Afonso +Date: Wed Mar 20 11:19:38 2024 +0000 + + [MLIR][OpenMP] NFC: Uniformize OpenMP ops names (#85393) + + This patch proposes the renaming of certain OpenMP dialect operations with the + goal of improving readability and following a uniform naming convention for + MLIR operations and associated classes. In particular, the following operations + are renamed: + + - `omp.map_info` -> `omp.map.info` + - `omp.target_update_data` -> `omp.target_update` + - `omp.ordered_region` -> `omp.ordered.region` + - `omp.cancellationpoint` -> `omp.cancellation_point` + - `omp.bounds` -> `omp.map.bounds` + - `omp.reduction.declare` -> `omp.declare_reduction` + + Also, the following MLIR operation classes have been renamed: + + - `omp::TaskLoopOp` -> `omp::TaskloopOp` + - `omp::TaskGroupOp` -> `omp::TaskgroupOp` + - `omp::DataBoundsOp` -> `omp::MapBoundsOp` + - `omp::DataOp` -> `omp::TargetDataOp` + - `omp::EnterDataOp` -> `omp::TargetEnterDataOp` + - `omp::ExitDataOp` -> `omp::TargetExitDataOp` + - `omp::UpdateDataOp` -> `omp::TargetUpdateOp` + - `omp::ReductionDeclareOp` -> `omp::DeclareReductionOp` + - `omp::WsLoopOp` -> `omp::WsloopOp` + +commit 9f80444c2e669237a5c92013f1a42b91b5609012 +Author: Sergio Afonso +Date: Tue Mar 19 13:25:33 2024 +0000 + + Revert "[Flang][OpenMP][Lower] NFC: Move clause processing helpers into the ClauseProcessor (#85258)" + + Reverting due to failing gfortran test. + + This reverts commit 2f2f16f32bb2a6c250b19adbc229d9dc3b38640c. + +commit 2f2f16f32bb2a6c250b19adbc229d9dc3b38640c +Author: Sergio Afonso +Date: Tue Mar 19 11:49:45 2024 +0000 + + [Flang][OpenMP][Lower] NFC: Move clause processing helpers into the ClauseProcessor (#85258) + + This patch moves some code in PFT to MLIR OpenMP lowering to the + `ClauseProcessor` class. This is so that some behavior that is related + to certain clauses stays within the `ClauseProcessor` and it's not the + caller the one responsible for always doing this when the clause is + present. + +commit d671ebe46ce6bb542ab81ea120751c985f3fe4a3 +Author: Sergio Afonso +Date: Tue Mar 19 10:45:59 2024 +0000 + + [Flang][Lower] NFC: Replace SmallVector with more suitable alternatives (#85227) + + In this patch some uses of `llvm::SmallVector` in Flang's lowering to + MLIR are replaced by other types (i.e. `llvm::ArrayRef` and + `llvm::SmallVectorImpl`) which are intended for these uses. This + generally prevents relying on always passing small vectors with a + particular number of elements in the stack. + +commit f6a2a55ba1fe1a4b720b8760704785d12137b35e +Author: Valentin Clement (バレンタイン クレメン) +Date: Mon Mar 18 19:46:11 2024 -0700 + + [flang][cuda] Handle lowering of stars in cuf kernel launch parameters (#85695) + + Parsing of the cuf kernel loop directive has been updated to handle + variants with the * syntax. This patch updates the lowering to make use + of them. + + - If the grid or block syntax uses only stars then the operation + variadic operand remains empty. + - If there is values and stars, then stars are represented as a zero + constant value. + +commit 8a6a0f1954937341abd501529f3d7454937110a5 +Author: Valentin Clement (バレンタイン クレメン) +Date: Mon Mar 18 17:11:04 2024 -0700 + + [flang][cuda] Add proper TODO for cuda fortran assignment (#85705) + + Data transfer between host and device can be done with assignment + statements in CUDA Fortran. This is currently not lowered so adding a + proper TODO. + + + https://docs.nvidia.com/hpc-sdk/archive/24.3/compilers/cuda-fortran-prog-guide/index.html#cfref-data-trans-assgn-statemts + +commit 87cee71b3738547465481740fcbde7d73283678f +Author: Kareem Ergawy +Date: Mon Mar 18 10:44:44 2024 +0100 + + [flang][MLIR][OpenMP] Extend delayed privatization for scalar allocatables and pointers (#84740) + + One more step in extending support for delayed privatization. This diff + adds support for scalar allocatables and pointers. + +commit 037a32a9a73286cf6e1bf439c61b03767658b564 +Author: Krzysztof Parzyszek +Date: Fri Mar 15 16:42:06 2024 -0500 + + [flang][OpenMP] Convert DataSharingProcessor to omp::Clause (#81629) + + [Clause representation 6/6] + +commit 60fa2b0670b874b702ddb9f81d098af692ea6875 +Author: Peter Klausler <35819229+klausler@users.noreply.github.com> +Date: Fri Mar 15 13:57:42 2024 -0700 + + [flang] Parse !$CUF KERNEL DO <<< (*) (#85338) + + Accept and represent asterisks within the parenthesized grid and block + specification lists. + +commit 63e70c05537c54edae975c8b5449ff87444abec2 +Author: Krzysztof Parzyszek +Date: Fri Mar 15 07:04:42 2024 -0500 + + [flang][OpenMP] Convert repeatable clauses (except Map) in ClauseProc… (#81623) + + …essor + + Rename `findRepeatableClause` to `findRepeatableClause2`, and make the + new `findRepeatableClause` operate on new `omp::Clause` objects. + + Leave `Map` unchanged, because it will require more changes for it to + work. + + [Clause representation 3/6] + +commit 096ee4e16fd62cd578d20ec4e8ad4756f4e369ee +Author: agozillon +Date: Wed Mar 13 16:18:21 2024 +0100 + + [Flang][OpenMP] Implement "promotion" of use_device_ptr non-cptr arguments to use_device_addr (#82834) + + This effectively implements some now deprecated OpenMP functionality + that some applications (most notably at the moment GenASiS) + unfortunately depend on (deprecated in specification version 5.2): + + "If a list item in a use_device_ptr clause is not of type C_PTR, the + behavior is as if the list item appeared in a use_device_addr clause. + Support for such list items in a use_device_ptr clause is deprecated." + + This PR downgrades the hard-error to a deprecated warning and "promotes" + the above cases by simply moving the offending operands from the + use_device_ptr value list to the back of the use_device_addr list (and + moves the related symbols, locs and types that form the BlockArgs + correspondingly) and then the generation of the target data construct + proceeds as normal. + +commit f46f5a01f4d5a7dcaf4a8fde5fc44eafdd9dbf27 +Author: Tom Eccles +Date: Wed Mar 13 14:51:09 2024 +0000 + + [flang][OpenMP][OMPIRBuilder][mlir] Optionally pass reduction vars by ref (#84304) + + Previously reduction variables were always passed by value into and out + of the initialization and combiner regions of the OpenMP reduction + declare operation. + + This worked well for reductions of primitive types (and might perform + better than passing by reference). But passing by reference will be + useful for array and derived type reductions (e.g. to move allocation + inside of the init region). + + Passing reductions by reference requires different LLVM-IR generation + when lowering from MLIR because some of the loads/stores/allocations + will now be moved inside of the init and combiner regions. This + alternate code generation is requested using a new attribute to + omp.wsloop and omp.parallel. + + Existing lowerings from mlir are unaffected (these will continue to use + the by-value argument passing. + + Flang will continue to pass by-value argument passing for trivial types + unless a (hidden) command line argument is supplied. Non-trivial types + will always use the by-ref lowering. + + Array reductions are not ready yet (but are coming very soon). In the + meantime, this is tested by forcing existing reductions to use by-ref. + + Commit series for by-ref OpenMP reductions 3/3 + + --------- + + Co-authored-by: Mats Petersson + +commit a4aac22683a44264bb3883242b1c6b711f534e8b +Author: harishch4 +Date: Tue Mar 12 20:04:35 2024 +0530 + + [Flang][OpenMp] Fix to threadprivate not working with host-association. (#74966) + + This patch considers host-associated variables to generate threadprivate + Ops. + + Fixes: #60763 #84561 + +commit 3b30559c088d679ca8fe491158e6c32db630f223 +Author: Kareem Ergawy +Date: Mon Mar 11 10:38:28 2024 +0100 + + [flang][OpenMP] Only use HLFIR base in privatization logic (#84123) + + Modifies the privatization logic so that the emitted code only used the + HLFIR base (i.e. SSA value `#0` returned from `hlfir.declare`). Before + that, that emitted privatization logic was a mix of using `#0` and `#1` + which leads to some difficulties trying to move to delayed privatization + (see the discussion on #84033). + +commit c03fd37d9b61bc6063e4d6e983846f877e83ac67 +Author: Anchu Rajendran S +Date: Thu Mar 7 08:23:58 2024 -0800 + + [flang] Changes to map variables in link clause of declare target (#83643) + + As per the OpenMP standard, "If a variable appears in a link clause on a + declare target directive that does not have a device_type clause with + the nohost device-type-description then it is treated as if it had + appeared in a map clause with a map-type of tofrom" is an implicit + mapping rule. Before this change, such variables were mapped as to by + default. + +commit afb05cd6469215232bd83e7cfbe59d2e1852567f +Author: agozillon +Date: Tue Mar 5 17:27:16 2024 +0100 + + [Flang][MLIR][OpenMP] Create a deferred declare target marking process for Bridge.cpp (#78502) + + This patch seeks to create a process that happens on module finalization + for OpenMP, in which a list of operations that had declare target + directives applied to them and were not generated at the time of + processing the original declare target directive are re-checked to apply + the appropriate declare target semantics. + + This works by maintaining a vector of declare target related data inside + of the FIR converter, in this case the symbol and the two relevant + unsigned integers representing the enumerators. This vector is added to + via a new function called from Bridge.cpp, insertDeferredDeclareTargets, + which happens prior to the processing of the directive (similarly to + getDeclareTargetFunctionDevice currently for requires), it effectively + checks if the Operation the declare target directive is applied to + currently exists, if it doesn't it appends to the vector. This is a + seperate function to the processing of the declare target via the + overloaded genOMP as we unfortunately do not have access to the list + without passing it through every call, as the AbstractConverter we pass + will not allow access to it (I've seen no other cases of casting it to a + FirConverter, so I opted to not do that). + + The list is then processed at the end of the module in the + finalizeOpenMPLowering function in Bridge by calling a new function + markDelayedDeclareTargetFunctions which marks the latently generated + operations. In certain cases, some still will not be generated, e.g. if + an interface is defined, marked as declare target, but has no definition + or usage in the module then it will not be emitted to the module, so due + to these cases we must silently ignore when an operation has not been + found via it's symbol. + + The main use-case for this (although, I imagine there is others) is for + processing interfaces that have been declared in a module with a declare + target directive but do not have their implementation defined in the + same module. For example, inside of a seperate C++ module that will be + linked in. In cases where the interface is called inside of a target + region it'll be marked as used on device appropriately (although, + realistically a user should explicitly mark it to match the + corresponding definition), however, in cases where it's used in a + non-clear manner through something like a function pointer passed to an + external call we require this explicit marking, which this patch adds + support for (currently will cause the compiler to crash). + + This patch also adds documentation on the declare target process and + mechanisms within the compiler currently. + +commit b585c43dccb2c608f698419a9c8d7645d3120fdb +Author: Kiran Chandramohan +Date: Tue Mar 5 10:28:36 2024 +0000 + + [Flang][OpenMP] : Add a temporary lowering for workshare directive (#78268) + + As a temporary solution, lower workshare to the single directive + +commit 5225901ecd53ba1e3f1519f3edea7d1aec15502d +Author: Peter Klausler <35819229+klausler@users.noreply.github.com> +Date: Thu Feb 29 13:02:39 2024 -0800 + + [flang] Add [[maybe_unused]] to fix -Werror build (#83456) + + Add the [[maybe_unused]] attribute to a variable in + lib/Lower/OpenMP/OpenMP.cpp to avoid a (possibly bogus) unused variable + warning when building with GCC 9.3.0. + +commit 06f775a82f6f562f8de75053f62c9c0dbeaa67d2 +Author: jeanPerier +Date: Wed Feb 28 14:30:29 2024 +0100 + + [flang] Give internal linkage to internal procedures (#81929) + + Internal procedures cannot be called directly from outside the host + procedure, so there is no point giving them external linkage. The only + reason flang did is because it is the default in MLIR. + + Giving external linkage to them: + - prevents deleting them when not used/inlined by LLVM + - causes bugs with shared libraries (at least on linux x86-64) because + the call to the internal function could lead to a dynamic loader call + that would overwrite r10 register (the static chain pointer) due to + system calls and did not restore (it seems it does not expect r10 to be + used for PLT calls). + + This patch gives internal linkage to internal procedures: + + Note: the llvm.linkage attribute name cannot be obtained via a + getLinkageAttrName since it is not the same name as the one used in the + LLVM dialect. It is just a placeholder defined in + mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp until the func dialect + gets a real linkage model. So simply avoid hard coding it too many times + in lowering. + +commit 26b8be201e2d15867bb327a8008fffb3e34d42a5 +Author: Kareem Ergawy +Date: Wed Feb 28 10:15:57 2024 +0100 + + [flang][OpenMP][MLIR] Basic support for delayed privatization code-gen (#81833) + + Adds basic support for emitting delayed privatizers from flang. So far, + only types of symbols are supported (i.e. scalars), support for more + complicated types will be added later. This also makes sure that + reduction and delayed privatization work properly together by merging + the + body-gen callbacks for both in case both clauses are present on the + parallel construct. + +commit b3189b13b274a3411f939574aa573a7656bf372b +Author: Valentin Clement (バレンタイン クレメン) +Date: Tue Feb 27 11:23:17 2024 -0800 + + [flang][cuda] CUF kernel loop directive (#82836) + + This patch introduces a new operation to represent the CUDA Fortran + kernel loop directive. This operation is modeled as a LoopLikeOp + operation in a similar way to acc.loop. + + The CUFKernelDoConstruct parse tree node is also placed correctly in the + PFTBuilder to be available in PFT evaluations. + + Lowering from the flang parse-tree to MLIR is also done. + +commit e50a231dcdd6aafa922b177b4fc4629bb7a10a79 +Author: Leandro Lupori +Date: Wed Feb 21 14:51:37 2024 -0300 + + [flang][OpenMP] Add support for copyprivate (#80485) + + Add initial handling of OpenMP copyprivate clause in Flang. + + When lowering copyprivate, Flang generates the copy function + needed by each variable and builds the appropriate + omp.single's CopyPrivateVarList. + + This is patch 3 of 4, to add support for COPYPRIVATE in Flang. + Original PR: https://github.com/llvm/llvm-project/pull/73128 + +commit 58f45d909d2a1565128846e423b480808736f214 +Author: Pranav Bhandarkar +Date: Wed Feb 21 11:28:25 2024 -0600 + + [flang][openmp] - depend clause support in target, target enter/update/exit data constructs (#81610) + + This patch adds support in flang for the depend clause in target and + target enter/update/exit constructs. Previously, the following line in a + fortran program would have resulted in the error shown below it. + + !$omp target map(to:a) depend(in:a) + + + "not yet implemented: Unhandled clause DEPEND in TARGET construct" + +commit 4d4af15c3fb671ed9f7eef9f29ebd6fde15618df +Author: Kareem Ergawy +Date: Wed Feb 21 15:55:42 2024 +0100 + + [NFC][flang][OpenMP] Split `DataSharing` and `Clause` processors (#81973) + + This started as an experiment to reduce the compilation time of + iterating over `Lower/OpenMP.cpp` a bit since it is too slow at the + moment. Trying to do that, I split the `DataSharingProcessor`, + `ReductionProcessor`, and `ClauseProcessor` into their own files and + extracted some shared code into a util file. All of these new `.h/.cpp` + files as well as `OpenMP.cpp` are now under a `Lower/OpenMP/` directory. + + This resulted is a slightly better organization of the OpenMP lowering + code and hence opening this NFC. + + As for the compilation time, this unfortunately does not affect it much + (it shaves off a few seconds of `OpenMP.cpp` compilation) since from + what I learned the bottleneck is in `DirectivesCommon.h` and + `PFTBuilder.h` which both consume a lot of time in template + instantiation it seems. + +commit e769fb8699e3fa8e40623764f7713bfc783b0330 +Author: jeanPerier +Date: Thu Feb 15 09:06:42 2024 +0100 + + [flang] prevent legacy lowering from being called in pointer assignment (#81750) + + When doing a pointer assignment with an RHS that is an array section, + the code fell in the legacy lowering code even with HLFIR enabled. + Escape this old code when HLFIR is on. + + Should fix #80884. + +commit d1f510cca8e966bd1742bf17256bfec99dcdf229 +Author: Mats Petersson +Date: Tue Feb 13 14:32:26 2024 +0000 + + Fix warning by removing unused variable (#81604) + + Apparently, some compilers [correctly] warn that the variable that was + created prior to this change is unused. + + This reemoves the variable. + +commit 1af073a11cb2ae5a52205e66f33d0ec9bbcbb5e0 +Author: Krzysztof Parzyszek +Date: Mon Feb 12 19:15:55 2024 -0600 + + [flang][OpenMP] Pass semantics context to all generating functions in… (#81269) + + … lower + + The convention is to pass it after "symTable" if present, otherwise + after "converter": + - converter, symTable, semaCtx + - converter, semaCtx + + This makes the interfaces more uniform---some of these functions were + already taking the semantics context, while others were not. + + The context will be used in future patches. + +commit b2b3a5248540320e74347fcdaffbd148d1e9d494 +Author: Mats Petersson +Date: Fri Feb 9 18:05:51 2024 +0000 + + Skip compiler directives between OMP PARALLEL DO and the loop (#81021) + + This fixes a compilation error when code like this is presented to the + compiler: + + !$OMP PARALLEL DO + !DIR$ VECTOR ALIGNED + DO 20 i=1,N + a = a + 0.5 + 20 CONTINUE + + The directive itself is later ignored (with a warning that this is + happening), but because the compiler already errored out before that + point, it completely fails to compile this code. Other compilers accept + the code without complaints. + +commit b081e9d4cafe2563c513ed7b5ae3ced6d177b657 +Author: Daniel Chen +Date: Fri Feb 9 10:56:57 2024 -0500 + + [Flang] Fix NULLIFY statement that returns too early for multiple procedure pointer objects. (#81164) + + The current code that handles NULLIFY statement for procedure pointer + returns after the 1st object. + This PR is to remove the `return` so it can nullify multiple procedure + pointer objects. + +commit 0a45d172d3229074d414e1942d6bafa2b4ae9126 +Author: jeanPerier +Date: Mon Feb 5 10:12:33 2024 +0100 + + [flang] Do not instantiate runtime info globals in functions (#80447) + + Runtime globals are compiler generated globals injected in user scopes. + They are never referred to directly in lowering code, we only need th + fur.global for them. Yet lowering was creating hlfir.declare for them in + module procedures. In modern fortran apps, this blows up the generated + IR for nothing (Types with dozens of components, type bound procedures + and parents can create in the order of 10 000 runtime info globals to + describe them, if there is a 100 module procedure, that is that is a few + million operations generated and processed in each pass for nothing). + +commit bd8bec27e25022b07ec7044654cd6a1efcd9704f +Author: Daniel Chen +Date: Wed Jan 31 11:24:17 2024 -0500 + + [Flang] Support NULL(procptr): null intrinsic that has procedure pointer argument. (#80072) + + This PR adds support for NULL intrinsic to have a procedure pointer + argument. + +commit 837bff11cb7d31f40805c73d4f539960a77eda33 +Author: Sergio Afonso +Date: Tue Jan 30 13:45:56 2024 +0000 + + [Flang][Lower] Attach target_cpu and target_features attributes to MLIR functions (#78289) + + This patch forwards the target CPU and features information from the + Flang frontend to MLIR func.func operation attributes, which are later + used to populate the target_cpu and target_features llvm.func + attributes. + + This is achieved in two stages: + + 1. Introduce the `fir.target_cpu` and `fir.target_features` module + attributes with information from the target machine immediately after + the initial creation of the MLIR module in the lowering bridge. + + 2. Update the target rewrite flang pass to get this information from the + module and pass it along to all func.func MLIR operations, respectively + as attributes named `target_cpu` and `target_features`. These attributes + will be automatically picked up during Func to LLVM dialect lowering and + used to initialize the corresponding llvm.func named attributes. + + The target rewrite and FIR to LLVM lowering passes are updated with the + ability to override these module attributes, and the `CodeGenSpecifics` + optimizer class is augmented to make this information available to + target-specific MLIR transformations. + + This completes a full flow by which target CPU and features make it all + the way from compiler options to LLVM IR function attributes. + +commit 181eab27d244b9a9eb32d6716f9c38f7f3723356 +Author: jeanPerier +Date: Mon Jan 29 18:28:56 2024 +0100 + + [flang] Set KIND in compiler generated COUNT for SIZE(PACK) (#79801) + + Compiler was rewriting SIZE(PACK(x, MASK)) to COUNT(MASK). It was + wrapping the COUNT call without a KIND argument (leading to INTEGER(4) + result in the characteristics) in an Expr (implying + INTEGER(8) result), this lead to inconsistencies that later hit verifier + errors in lowering. + + Set the KIND argument to the KIND of ExtentType to ensure the built + expression is consistent. + + This requires giving access to some safe place where the "kind" name can + be saved and turned into a CharBlock (count has a DIM argument that + require using the KIND keyword here). For the FoldingContext that belong + to SemanticsContext, this is the same string set as the one used by + SemanticsContext for similar purposes. + +commit 5062a178bf9dd46008b8f7a182facb6152c46889 +Author: Valentin Clement (バレンタイン クレメン) +Date: Mon Jan 22 10:31:37 2024 -0800 + + [flang][openacc] Lower loop directive to the new acc.loop op design (#65417) + + acc.loop was redesigned in https://reviews.llvm.org/D159229. This patch + updates the lowering to match the new op. + + DO CONCURRENT construct will be added in a follow up patch. + + Note that the pre-commit ci will fail until D159229 is merged. + + Depends on #67355 + +commit c5a9e354379d29ee763e9982faf57398789c8d5b +Author: Krzysztof Parzyszek +Date: Mon Jan 15 08:01:41 2024 -0600 + + [Flang][OpenMP] Push genEval calls to individual operations, NFC (#77758) + + Introduce `genNestedEvaluations` that will lower all evaluations nested + in the given, accouting for a potential COLLAPSE directive. + + Recursive lowering [2/5] + +commit a2d7af757bc33dc91f2e038742915a146cfb0c13 +Author: Katherine Rasmussen +Date: Tue Jan 2 10:40:47 2024 -0800 + + [flang] Add notify-type and notify-wait-stmt (#76594) + + Add `notify-type` to `iso_fortran_env` module. Add `notify-wait-stmt` to + the parser and add checks for constraints on the statement, `C1177` and + `C1178`, from the Fortran 2023 standard. Add three semantics tests for + `notify-wait-stmt`. + +commit c373f58134997a6d037f0143f13f97451278700f +Author: jeanPerier +Date: Tue Dec 19 17:17:09 2023 +0100 + + [flang] Lower procedure pointer components (#75453) + + Lower procedure pointer components, except in the context of structure + constructor (left TODO). + + Procedure pointer components lowering share most of the lowering logic + of procedure poionters with the following particularities: + - They are components, so an hlfir.designate must be generated to + retrieve the procedure pointer address from its derived type base. + - They may have a PASS argument. While there is no dispatching as with + type bound procedure, special care must be taken to retrieve the derived + type component base in this case since semantics placed it in the + argument list and not in the evaluate::ProcedureDesignator. + + These components also bring a new level of recursive MLIR types since a + fir.type may now contain a component with an MLIR function type where + one of the argument is the fir.type itself. This required moving the + "derived type in construction" stackto the converter so that the object + and function type lowering utilities share the same state (currently the + function type utilty would end-up creating a new stack when lowering its + arguments, leading to infinite loops). The BoxedProcedurePass also + needed an update to deal with this recursive aspect. + +commit 82e91b91ca0ceab5ee977295540643ce67153f89 +Author: Krzysztof Parzyszek +Date: Fri Dec 15 09:32:57 2023 -0600 + + [flang][OpenMP] Move handling of OpenMP symbol flags to OpenMP.cpp (#75523) + + The function `instantiateVariable` in Bridge.cpp has the following code: + ``` + if (var.getSymbol().test( + Fortran::semantics::Symbol::Flag::OmpThreadprivate)) + Fortran::lower::genThreadprivateOp(*this, var); + + if (var.getSymbol().test( + Fortran::semantics::Symbol::Flag::OmpDeclareTarget)) + Fortran::lower::genDeclareTargetIntGlobal(*this, var); + ``` + + Implement `handleOpenMPSymbolProperties` in OpenMP.cpp, move the above + code there, and have `instantiateVariable` call this function instead. + + This would further separate OpenMP-related details into OpenMP.cpp. + +commit aeb482106c03cb05025f904db69c65dbcfa745fe +Author: Krzysztof Parzyszek +Date: Fri Dec 15 09:01:08 2023 -0600 + + [flang][OpenMP] Move nested eval conversion to OpenMP.cpp, NFC (#75502) + + This is the first step towards exploiting `genEval` functionality from + inside of OpenMP-generating functions. + + This follows discourse discussion: + https://discourse.llvm.org/t/openmp-lowering-from-pft-to-fir/75263 + +commit fedc54bf35b378ab3418ba0f36c1df476aef5aca +Author: Valentin Clement (バレンタイン クレメン) +Date: Thu Dec 14 09:25:27 2023 -0800 + + [flang] Add genEval to the AbstractConverter (#75140) + + There was some discussion on discourse[1] about allowing call to FIR + generation functions from other part of lowering belonging to OpenMP. + + This solution exposes a simple `genEval` member function on the + `AbstractConverter` so that IR generation for PFT Evaluation objects can + be called from lowering outside of the FirConverter but not exposing it. + + [1] https://discourse.llvm.org/t/openmp-lowering-from-pft-to-fir/75263 + +commit e59e848805f57bd52ebbb0f7f7d4d951e6af597c +Author: jeanPerier +Date: Wed Dec 6 14:20:06 2023 +0100 + + [flang] Updating drivers to create data layout before semantics (#73301) + + Preliminary patch to change lowering/code generation to use + llvm::DataLayout information instead of generating "sizeof" GEP (see + https://github.com/llvm/llvm-project/issues/71507). + + Fortran Semantic analysis needs to know about the target type size and + alignment to deal with common blocks, and intrinsics like + C_SIZEOF/TRANSFER. This information should be obtained from the + llvm::DataLayout so that it is consistent during the whole compilation + flow. + + This change is changing flang-new and bbc drivers to: + 1. Create the llvm::TargetMachine so that the data layout of the target + can be obtained before semantics. + 2. Sharing bbc/flang-new set-up of the + SemanticConstext.targetCharateristics from the llvm::TargetMachine. For + now, the actual part that set-up the Fortran type size and alignment + from the llvm::DataLayout is left TODO so that this change is mostly an + NFC impacting the drivers. + 3. Let the lowering bridge set-up the mlir::Module datalayout attributes + since it is doing it for the target attribute, and that allows the llvm + data layout information to be available during lowering. + + For flang-new, the changes are code shuffling: the `llvm::TargetMachine` + instance is moved to `CompilerInvocation` class so that it can be used + to set-up the semantic contexts. `setMLIRDataLayout` is moved to + `flang/Optimizer/Support/DataLayout.h` (it will need to be used from + codegen pass for fir-opt target independent testing.)), and the code + setting-up semantics targetCharacteristics is moved to + `Tools/TargetSetup.h` so that it can be shared with bbc. + + As a consequence, LLVM targets must be registered when running + semantics, and it is not possible to run semantics for a target that is + not registered with the -triple option (hence the power pc specific + modules can only be built if the PowerPC target is available. + +commit 3aba9264b38c1aa3a991065305c0a04988432692 +Author: vdonaldson <37090318+vdonaldson@users.noreply.github.com> +Date: Mon Dec 4 09:55:54 2023 -0800 + + [flang] IEEE_ARITHMETIC and IEEE_EXCEPTIONS intrinsic module procedures (#74138) + + Implement a selection of intrinsic module procedures that involve + exceptions. + + - IEEE_GET_FLAG + - IEEE_GET_HALTING_MODE + - IEEE_GET_MODES + - IEEE_GET_STATUS + - IEEE_LOGB + - [f23] IEEE_MAX, IEEE_MAX_MAG, IEEE_MAX_NUM, IEEE_MAX_NUM_MAG + - [f23] IEEE_MIN, IEEE_MIN_MAG, IEEE_MIN_NUM, IEEE_MIN_NUM_MAG + - IEEE_QUIET_EQ, IEEE_QUIET_GE, IEEE_QUIET_GT, + - IEEE_QUIET_LE, IEEE_QUIET_LT, IEEE_QUIET_NE + - IEEE_SET_FLAG + - IEEE_SET_HALTING_MODE + - IEEE_SET_MODES + - IEEE_SET_STATUS + - IEEE_SIGNALING_EQ, IEEE_SIGNALING_GE, IEEE_SIGNALING_GT, + - IEEE_SIGNALING_LE, IEEE_SIGNALING_LT, IEEE_SIGNALING_NE + - IEEE_SUPPORT_FLAG + - IEEE_SUPPORT_HALTING + +commit dd376f859526d9023c879e880f380158050daa5b +Author: Krzysztof Parzyszek +Date: Mon Dec 4 08:27:57 2023 -0600 + + [flang] Fix move-assign operator for struct IncrementLoopInfo (#74137) + +commit a9a5af82704d772509ccef87991384f47b65884d +Author: Valentin Clement (バレンタイン クレメン) +Date: Thu Nov 30 14:25:03 2023 -0800 + + [flang][openacc] Support early return in acc.loop (#73841) + + Early return is accepted in OpenACC loop not directly nested in a + compute construct. Since acc.loop operation has a region, the + `func.return` operation cannot be directly used inside the region. + An early return is materialized by an `acc.yield` operation returning a + `true` value. The standard end of the `acc.loop` region yield a `false` + value in this case. + A conditional branch operation on the `acc.loop` result will branch to + the `finalBlock` or just to the continue block whether an early exit was + produce in the acc.loop. + +commit 0ccef6a723cbfe3d72b85d34963badb37a6a9a53 +Author: Mats Petersson +Date: Wed Nov 29 16:15:43 2023 +0000 + + [flang] Make adapt.valuebyref attribute work again (#73658) + + This got "lost" in the HLFIR transformation. This patch applies the old + attribute to the AssociateOp that needs it, and forwards it to the + AllocaOp that is generated when lowering to FIR. + +commit af09219edd87db860d1fc5a33dd49ecd31291699 +Author: Daniel Chen +Date: Wed Nov 22 11:51:12 2023 -0500 + + [Flang] Add partial support for lowering procedure pointer assignment. (#70461) + + **Scope of the PR:** + 1. Lowering global and local procedure pointer declaration statement + with explicit or implicit interface. The explicit interface can be from + an interface block, a module procedure or an internal procedure. + 2. Lowering procedure pointer assignment, where the target procedure + could be external, module or internal procedures. + 3. Lowering reference to procedure pointers so that it works end to end. + + **PR notes:** + 1. The first commit of the PR does not include testing. I would like to + collect some comments first, which may alter the output. Once I confirm + the implementation, I will add some testing as a follow up commit to + this PR. + 2. No special handling of the host-associated entities when an internal + procedure is the target of a procedure pointer assignment in this PR. + + **Implementation notes:** + 1. The implementation is using the HLFIR path. + 2. Flang currently uses `getUntypedBoxProcType` to get the + `fir::BoxProcType` for `ProcedureDesignator` when getting the address of + a procedure in order to pass it as an actual argument. This PR inherits + the same design decision for procedure pointer as the `fir::StoreOp` + requires the same memory type. + + Note: this commit is actually resubmitting the original commit from + PR #70461 that was reverted. See PR #73221. + +commit 49f55d107548a340992eaec1b9767c0f8fc443cd +Author: Muhammad Omair Javaid +Date: Thu Nov 23 12:29:35 2023 +0500 + + Revert "[Flang] Add partial support for lowering procedure pointer assignment. (#70461)" + + This reverts commit e07fec10ac208c2868a24c5c0be88e45778b297e. + + This change appears to have broken following buildbots: + https://lab.llvm.org/buildbot/#/builders/176 + https://lab.llvm.org/buildbot/#/builders/179 + https://lab.llvm.org/buildbot/#/builders/184 + https://lab.llvm.org/buildbot/#/builders/197 + https://lab.llvm.org/buildbot/#/builders/198 + + All bots fails in testsuite where following tests seems broken: + (eg: https://lab.llvm.org/buildbot/#/builders/176/builds/7131) + + test-suite::gfortran-regression-compile-regression__proc_ptr_46_f90.test + test-suite::gfortran-regression-compile-regression__proc_ptr_37_f90.test + +commit e07fec10ac208c2868a24c5c0be88e45778b297e +Author: Daniel Chen +Date: Wed Nov 22 11:51:12 2023 -0500 + + [Flang] Add partial support for lowering procedure pointer assignment. (#70461) + + **Scope of the PR:** + 1. Lowering global and local procedure pointer declaration statement + with explicit or implicit interface. The explicit interface can be from + an interface block, a module procedure or an internal procedure. + 2. Lowering procedure pointer assignment, where the target procedure + could be external, module or internal procedures. + 3. Lowering reference to procedure pointers so that it works end to end. + + **PR notes:** + 1. The first commit of the PR does not include testing. I would like to + collect some comments first, which may alter the output. Once I confirm + the implementation, I will add some testing as a follow up commit to + this PR. + 2. No special handling of the host-associated entities when an internal + procedure is the target of a procedure pointer assignment in this PR. + + **Implementation notes:** + 1. The implementation is using the HLFIR path. + 2. Flang currently uses `getUntypedBoxProcType` to get the + `fir::BoxProcType` for `ProcedureDesignator` when getting the address of + a procedure in order to pass it as an actual argument. This PR inherits + the same design decision for procedure pointer as the `fir::StoreOp` + requires the same memory type. + +commit a3700cc29da8fc48361256609bc0903ff94106c7 +Author: Valentin Clement (バレンタイン クレメン) +Date: Tue Nov 14 14:42:11 2023 -0800 + + [flang][openacc] Make implicit declare region unstructured (#71591) + + Using an op with a region cause some issue with unstructured code. This + patch make use of acc.declare_enter and acc.declare_exit to represent + the implicit declare region. + +commit 1c91d9bdea3b6c38e8fbce46ec8181a9c0aa26f8 +Author: Peter Klausler <35819229+klausler@users.noreply.github.com> +Date: Mon Nov 13 16:13:50 2023 -0800 + + [flang] Ensure that portability warnings are conditional (#71857) + + Before emitting a warning message, code should check that the usage in + question should be diagnosed by calling ShouldWarn(). A fair number of + sites in the code do not, and can emit portability warnings + unconditionally, which can confuse a user that hasn't asked for them + (-pedantic) and isn't terribly concerned about portability *to* other + compilers. + + Add calls to ShouldWarn() or IsEnabled() around messages that need them, + and add -pedantic to tests that now require it to test their portability + messages, and add more expected message lines to those tests when + -pedantic causes other diagnostics to fire. + +commit 91f92e6a6bfa4a1d963234ba70adb5a7957aeb1e +Author: NimishMishra <42909663+NimishMishra@users.noreply.github.com> +Date: Fri Nov 10 00:17:47 2023 -0800 + + [flang][OpenMP] Fix common block missing symbol crash (#67330) + + Fixes #65034 by skipping copy of host-association information if the + concerned symbol is missing from the inner construct + +commit d0ef94bc83019f7cb92a33b545782294ffdcfd04 +Author: Kiran Chandramohan +Date: Tue Nov 7 11:53:30 2023 +0000 + + Revert "Revert "[Flang][OpenMP] Fix to support privatisation of alloc strings (#71204)"" + + This reverts commit ba116ff41d525a4b6c931664f1b4437a7dd55b1d. + + This relands https://github.com/llvm/llvm-project/pull/71204 with a fix + in the test. + +commit ba116ff41d525a4b6c931664f1b4437a7dd55b1d +Author: Kiran Chandramohan +Date: Tue Nov 7 11:50:24 2023 +0000 + + Revert "[Flang][OpenMP] Fix to support privatisation of alloc strings (#71204)" + + This reverts commit 192bee0db1dfb8b2f570031102a2326037b75fa1. + Reverting to fix the CI. https://lab.llvm.org/buildbot/#/builders/21/builds/84995 + +commit 192bee0db1dfb8b2f570031102a2326037b75fa1 +Author: Kiran Chandramohan +Date: Tue Nov 7 11:40:25 2023 +0000 + + [Flang][OpenMP] Fix to support privatisation of alloc strings (#71204) + +commit 7046202c3dde093420c08e40116568e76a48ee59 +Author: jeanPerier +Date: Fri Oct 27 09:07:48 2023 +0200 + + [flang] Move whole allocatable assignment implicit conversion to lowering (#70317) + + The front-end is making implicit conversions explicit in assignment and + structure constructors. + + While this generally helps and is needed by semantics to fold structure + constructors correctly, this is incorrect when the LHS or component is + an allocatable. The RHS may have non default lower bounds that should be + propagated to the LHS, and making the conversion explicit changes the + semantics. In the structure constructor, the situation is even worse + since Fortran 2018 7.5.10 point 7 allows the value to be a reference to + an unallocated allocatable, and adding an explicit conversion in + semantics will cause a segfault. + + This patch removes the explicit convert in semantics when the + LHS/component is a whole allocatable, and update lowering to deal with + the conversion insertion, dealing with preserving the lower bounds and + the tricky structure constructor case. + +commit b6b0756ce5c4e2e07d7f6f1f430d3d29afe9a8a8 +Author: jeanPerier +Date: Wed Oct 25 09:22:23 2023 +0200 + + [flang] Allow lowering of sub-expressions to be overridden (#69944) + + OpenACC/OpenMP atomic lowering needs a finer control over expression + lowering. This patch allows mapping evaluate::Expr to mlir::Value so + that any subsequent expression lowering will use these values when an + operand is a mapped Expr. + + This is an alternative to + https://github.com/llvm/llvm-project/pull/69866 From which I took the + test and some of the logic to extract the non-atomic sub-expression. + + --------- + + Co-authored-by: Nimish Mishra + +commit 828674395b1997c01acd9c560646d909b9cc3615 +Author: Valentin Clement (バレンタイン クレメン) +Date: Tue Oct 24 09:17:48 2023 -0700 + + [flang][openacc] Allow acc routine at the top level (#69936) + + Some compilers allow the `$acc routine()` to be placed at the + program unit level. To be compatible, this patch enables the use of acc + routine at this level. These acc routine directives must have a name. + +commit 2ef370b7716b39390736e181d2eaabd740e1d59d +Author: jeanPerier +Date: Fri Oct 20 11:11:52 2023 +0200 + + [flang][openmp] Update copyHostAssociateVar to use hlfir.assign for HLFIR (#69441) + + The code in `copyHostAssociateVar` is using `createSomeArrayAssignment` + for arrays which is using the soon legacy expression lowering. Update + the copy to use hlfir.assign instead. + + I used the temporary_lhs flag to mimic the current behavior, but maybe + user defined assignment should be called when needed .This flag also + prevents any finalizers to be called on the LHS if the LHS type has + finalizers (which would occur otherwise in normal intrinsic assignment). + Again, I am not sure what the OpenMP spec wants here. + + Also, I added special handling for ALLOCATABLE, the current code seems + broken to me since it is basically copying the descriptor which would + lead to memory leak given the TEMP was previously allocated with the + shape of the variable in createHostAssociateVarClone. So copying the + DATA instead seemed like the right thing to do. + +commit bfcd05317d0fbe90474eda13a4dbf33c2cee4130 +Author: jeanPerier +Date: Tue Oct 17 09:11:53 2023 +0200 + + [flang][hlfir] Do not emit extra declare for dummy used in BLOCK (#69184) + + When a variable is used in a specification expression in a scope, it is + added to the list of variables that must be instantiated when lowering + the scope. When lowering a BLOCK, this caused instantiateVar to be + called again on all the host block variables appearing in block variable + specification expressions. This caused an extra declare to be emitted + for dummy inside block (for non dummy, instantiateVar is a no-op if the + symbol is already mapped). + + Only call instantiateVar if the symbol is not mapped when lowering BLOCK + variables. + +commit 5db4779c3f07b6f562339722c176fb58329652ac +Author: Pete Steinfeld <47540744+psteinfeld@users.noreply.github.com> +Date: Mon Oct 16 12:37:57 2023 -0700 + + [flang] Regularize TODO messages for coarray related features (#69227) + + I want to make "not yet implemented" messages for features related to + coarrays easy to identify and make them easy for users to read. + +commit 4ccd57ddb11e833f6b2ec2188e73c4ef3a5ab80e +Author: jeanPerier +Date: Fri Oct 6 09:29:57 2023 +0200 + + [flang][nfc] replace fir.dispatch_table with more generic fir.type_info (#68309) + + The goal is to progressively propagate all the derived type info that is + currently in the runtime type info globals into a FIR operation that can + be easily queried and used by FIR/HLFIR passes. + + When this will be complete, the last step will be to stop generating the + runtime info global in lowering, but to do that later in or just before + codegen to keep the FIR files readable (on the added type-info.f90 + tests, the lowered runtime info globals takes a whooping 2.6 millions + characters on 1600 lines of the FIR textual output. The fir.type_info that + contains all the info required to generate those globals for such + "trivial" types takes 1721 characters on 9 lines). + + So far this patch simply starts by replacing the fir.dispatch_table + operation by the fir.type_info operation and to add the noinit/ + nofinal/nodestroy flags to it. These flags will soon be used in HLFIR to + better rewrite hlfir.assign with derived types. + +commit 43d2ef2856fc3373068c020efa11a933477e11fa +Author: jeanPerier +Date: Tue Sep 26 20:33:01 2023 +0200 + + [flang][lowering] propagate location info of macro expansions (#67446) + + Currently flang-new -g is failing when compiling code containing a call + in a macro to a function defined in the same file. + + The verification added in https://reviews.llvm.org/D157447 is valid, + flang lowering was failing to propagate location information in code + from macro expansion because GetSourcePositionRange does not work with + them (it fails to come with an end location), but we do not need a range + for the MLIR location, only the start. + + Use GetSourcePosition instead that works with code from macro expansion. + + Note that the source location is the one of the statement where the + macro appeared, if needed some FusedLocation could be later built to + keep a link to the macro location in the debug info. + +commit 2cb31fe8ea7bbe3c4fce0f03f8126341a353d01b +Author: jeanPerier +Date: Thu Sep 21 18:38:23 2023 +0200 + + [flang] Centralize automatic deallocation code in lowering (#67003) + + There are currently several places that automatically deallocate + allocatble if they are allocated: + - INTENT(OUT) allocatable are deallocated on entry in the callee + - INTENT(OUT) allocatable are also deallocated on the caller side of + BIND(C) function in case the implementation is in C. + - Results of function returning allocatable are deallocated after usage. + - OPENMP privatized allocatable are deallocated at the end of OPENMP + region. + + Introduce genDeallocateIfAllocated that centralize all this code, except + for the function return that use genFreememIfAllocated since + finalization is done separately currently. + + `fir::factory::genFinalization` and + `fir::factory::genInlinedDeallocation` are removed and replaced by + genFreemem since their name were misleading: finalization was not + called. + + There is a fallout in the tests because previous generated code did not + check the allocated status when doing inline deallocation. This was OK + since free(null) is guaranteed to be a no-op, but this makes compiler + code more complex, is a bit surprising in the generated IR IMHO, and it + relied on knowing when genDeallocateBox inserts runtime calls or uses + inlined code. + +commit 3dbb055f54e705d125e1fd30db463e7aff8bbeff +Author: Leandro Lupori +Date: Thu Sep 21 15:59:35 2023 +0200 + + [flang] Generate valid IR on GOTO DO body (#66084) + + Flang was generating invalid IR when there was a GOTO to the body + of a DO loop. This happened because the value of step, computed at + the beginning of the loop, was being reused at the end of the loop, + that, for unstructured loops, is in another basic block. Because of + this, a GOTO could skip the beginning of the loop, that defined + step, and yet try to use it at the end of the loop, which is + invalid. + + Instead of reusing the step value, it can be recomputed if it is a + constant, or stored and loaded to/from a temporary variable, for + non-constant step expressions. + + Note that, while this change prevents the generation of invalid IR + on the presence of jumps to DO loop bodies, what happens if the + program reaches the end of a DO loop without ever passing through + its beginning is undefined behavior, as some control variables, + such as trip, will be uninitialized. It doesn't seem worth the + effort and overhead to ensure this legacy extension will behave + correctly in this case. This is consistent with at least gfortran, + that doesn't behave correctly if step is not equal to one. + + Fixes: https://github.com/llvm/llvm-project/issues/65036 + +commit 8fde6f41a0e5a2b280e46521ed2236fab5c03412 +Author: Andrew Gozillon +Date: Tue Sep 19 08:00:40 2023 -0500 + + [Flang][OpenMP] Add lowering from PFT to new MapEntry and Bounds operations and tie them to relevant Target operations + + This patch builds on top of a prior patch in review which adds a new map + and bounds operation by modifying the OpenMP PFT lowering to support + these operations and generate them from the PFT. + + A significant amount of the support for the Bounds operation is borrowed + from OpenACC's own current implementation and lowering, just ported + over to OpenMP. + + The patch also adds very preliminary/initial support for lowering to + a new Capture attribute, which is stored on the new Map Operation, + which helps the later lowering from OpenMP -> LLVM IR by indicating + how a map argument should be handled. This capture type will + influence how a map argument is accessed on device and passed by + the host (different load/store handling etc.). It is reflective of a + similar piece of information stored in the Clang AST which performs a + similar role. + + As well as some minor adjustments to how the map type (map bitshift + which dictates to the runtime how it should handle an argument) is + generated to further support more use-cases for future patches that + build on this work. + + Finally it adds the map entry operation creation and tying it to the relevant + target operations as well as the addition of some new tests and alteration + of previous tests to support the new changes. + + Depends on D158732 + + reviewers: kiranchandramohan, TIFitis, clementval, razvanlupusoru + + Differential Revision: https://reviews.llvm.org/D158734 + +commit 47025af6399aa29a045275349b04aaffaa918d1b +Author: Slava Zakharin +Date: Mon Sep 18 09:59:06 2023 -0700 + + [flang][hlfir] Alias analysis for host associated accesses. (#65919) + + This patch adds `host_assoc` attribute for operations that implement + FortranVariableInterface (e.g. `hlfir.declare`). The attribute is used + by the alias analysis to make better conclusions about memory overlap. + For example, a dummy argument of an inner subroutine and a host's + variable used inside the inner subroutine cannot refer to the same + object (if the dummy argument does not satisify exceptions in F2018 + 15.5.2.13). + This closes a performance gap between HLFIR optimization pipeline + and FIR ArrayValueCopy for Polyhedron/nf. + +commit 99a54b839a441a6e9dd9106c5fd9e547cf1309e5 +Author: jeanPerier +Date: Mon Sep 18 14:59:56 2023 +0200 + + [flang] Lower PRIVATE component names safely (#66076) + + It is possible for a derived type extending a type with private + components to define components with the same name as the private + components. + + This was not properly handled by lowering where several fir.record type + component names could end-up being the same, leading to bad generated + code (only the first component was accessed via fir.field_index, leading + to bad generated code). + + This patch handles the situation by adding the derived type mangled name + to private component. + +commit 29aa749087be38d3e5a3a37e0b8e8ab74e9f79aa +Author: Sergio Afonso +Date: Wed Mar 29 18:13:48 2023 +0100 + + [OpenMP][Flang][MLIR] Lowering of OpenMP requires directive from parse tree to MLIR + + This patch implements the lowering of the OpenMP 'requires' directive + from Flang parse tree to MLIR attributes attached to the top-level + module. + + Target-related 'requires' clauses are gathered and combined for each top-level + unit during semantics. Lastly, a single module-level `omp.requires` attribute + is attached to the MLIR module with that information at the end of the process. + + The `atomic_default_mem_order` clause is not addressed by this patch, but + rather it will come as a separate patch and follow a different approach. + + Depends on D147214, D150328, D150329 and D157983. + + Differential Revision: https://reviews.llvm.org/D147218 + +commit e070ea47a991d2b4a135f6bfb761b19013d7f6af +Author: Razvan Lupusoru +Date: Mon Sep 11 13:58:10 2023 -0700 + + [flang][openacc] Enable lowering support for OpenACC atomic operations (#65776) + + Since the OpenACC atomics specification is a subset of OpenMP atomics, + the same lowering implementation can be used. This change extracts out + the necessary pieces from the OpenMP lowering and puts them in a shared + spot. The shared spot is a header file so that each implementation can + template specialize directly. + + After putting the OpenMP implementation in a common spot, the following + changes were needed to make it work for OpenACC: + * Ensure parsing works correctly by avoiding hardcoded offsets. + * Templatize based on atomic type. + * The checking whether it is OpenMP or OpenACC is done by checking for + OmpAtomicClauseList (OpenACC does not implement this so we just + templatize with void). It was preferable to check this instead of atomic + type because in some cases, like atomic capture, the read/write/update + implementations are called - and we want compile time evaluation of + these conditional parts. + * The memory order and hint are used only for OpenMP. + * Generate acc dialect operations instead of omp dialect operations. + +commit 6ffea74f7c2cda7de91879a771daa6d45da198d9 +Author: jeanPerier +Date: Fri Sep 8 10:43:55 2023 +0200 + + [flang] Use BIND name, if any, when consolidating common blocks (#65613) + + This patch changes how common blocks are aggregated and named in + lowering in order to: + + * fix one obvious issue where BIND(C) and non BIND(C) with the same + Fortran name were "merged" + + * go further and deal with a derivative where the BIND(C) C name matches + the assembly name of a Fortran common block. This is a bit unspecified + IMHO, but gfortran, ifort, and nvfortran "merge" the common block + without complaints as a linker would have done. This required getting + rid of all the common block mangling early in FIR (\_QC) instead of + leaving that to the phase that emits LLVM from FIR because BIND(C) + common blocks did not have mangled names. Care has to be taken to deal + with the underscoring option of flang-new. + + See added flang/test/Lower/HLFIR/common-block-bindc-conflicts.f90 for an + illustration. + +commit 20f4a5a313e58b15bdbf74c4773931c6baa96884 +Author: Valentin Clement (バレンタイン クレメン) +Date: Thu Sep 7 14:54:38 2023 -0700 + + [flang][openacc][NFC] Clean up lowering api (#65678) + + Remove unused argument `pft::Evaluation` from higher level lowering API. + +commit f8843efbb2190db85c696001ffd6211a2c20ac37 +Author: Slava Zakharin +Date: Thu Sep 7 11:41:22 2023 -0700 + + [flang][hlfir] Lower Cray pointee references. (#65563) + + A Cray pointee reference must be done using the characteristics + (bounds, type params) of the original pointee declaration, but + using the actual address value of the associated Cray pointer. + There might be multiple Cray pointees associated with the same + Cray pointer. + + The proposed solution is to lower each Cray pointee into a POINTER + variable with a descriptor. The descriptor is initialized at the point + of declaration of the pointee, though its base_addr is set to null. + Before each reference of the Cray pointee its descriptor's base_addr + is updated to the current value of the Cray pointer. + + The update of the base_addr is done using PointerAssociateScalar + runtime call, which just updates the base_addr of the descriptor. + This is a temporary solution just to make Cray pointers work + to the same extent they work with FIR lowering. + +commit d26c78b2ad5ed0f3384d7a3ef4b4d894f2b1be3e +Author: jeanPerier +Date: Wed Sep 6 09:07:45 2023 +0200 + + [flang] handle indirect module variable use in internal procedure (#65324) + + When a module variable is referenced inside an internal procedure, but + the use statement for the module is inside the host, semantics may not + create any symbols with HostAssocDetails directly under the internal + procedure scope. + So pft::getScopeVariableList, that is called in the bridge when lowering + the internal procedure scope, failed to instantiate the module + variables. This lead to "symbol is not mapped to any IR value" compile + time errors. + + This patch fixes the issue by adding the variables to the list of + "captured" global variables from the host program, so that they are + instantiated as part of the `internalProcedureBindings` in the bridge. + + The rational of doing it that way instead of changing + `getScopeVariableList` is that `getScopeVariableList` would have to + import all the module variables used inside the host since it cannot + know which ones are referenced inside the internal procedure from the + semantics::Scope information. The fix in this patch only instantiates + the module variables from the host that are actually referenced inside + the internal procedure. + +commit de8939ffca277670613904872e55a9a4d9f19b94 +Author: Slava Zakharin +Date: Tue Sep 5 10:26:16 2023 -0700 + + [flang] Reset lbounds for allocatable function results. (#65286) + + With HLFIR the lbounds for the ALLOCATABLE result are taken from the + mutable box created for the result, so the non-default lbounds might be + propagated further causing incorrect result, e.g.: + ``` + program p + real, allocatable :: p5(:) + allocate(p5, source=real_init()) + print *, lbound(p5, 1) ! must print 1, but prints 7 + contains + function real_init() + real, allocatable :: real_init(:) + allocate(real_init(7:8)) + end function real_init + end program p + ``` + + With FIR lowering the box passed for `source` has explicit lower bound 1 + at the call site, but the runtime box initialized by `real_init` call + still has lower bound 7. I am not sure if the runtime box initialized by + `real_init` will ever be accessed in a debugger via Fortran variable + names, but I think that having the right runtime bounds that can be + accessible via examining registers/stack might be good in general. So I + decided to update the runtime bounds at the point of return. + + This change fixes the test above for HLFIR. + + Reviewed By: jeanPerier + + Differential Revision: https://reviews.llvm.org/D156187 + +commit 90f58eb37b30cc2f5222053dc6e7e0a187819431 +Author: Kiran Chandramohan +Date: Fri Sep 1 10:44:35 2023 +0000 + + [Flang][OpenMP] Fix loop index privatisation in Parallel region in HLFIR + + HLFIR lowering always adds hlfir.declare when symbols are bound to their + address allocated on the stack. Ensure that the declare is placed along + with the alloca if it is hoisted. And always return the mlir value that + is bound to the symbol (i.e the alloca in FIR lowering and the declare + in HLFIR lowering). + + Context: Loop index variables in OpenMP parallel regions should be + privatised to work correctly. + + Reviewed By: tblah + + Differential Revision: https://reviews.llvm.org/D158594 + +commit 031b4e5e795a72e23c69da3d06ae7a958d217a8e +Author: Peter Klausler +Date: Mon Aug 21 12:21:49 2023 -0700 + + [flang] Support SELECT RANK on allocatables & pointers + + Unlike other executable constructs with associating selectors, the + selector of a SELECT RANK construct can have the ALLOCATABLE or POINTER + attribute, and will work as an allocatable or object pointer within + each rank case, so long as there is no RANK(*) case. + + Getting this right exposed a correctness risk with the popular + predicate IsAllocatableOrPointer() -- it will be true for procedure + pointers as well as object pointers, and in many contexts, a procedure + pointer should not be acceptable. So this patch adds the new predicate + IsAllocatableOrObjectPointer(), and updates some call sites of the original + function to use the new one. + + Differential Revision: https://reviews.llvm.org/D159043 + +commit a678ed41d24983bd1fb78b98fd790e3381979d44 +Author: Kazu Hirata +Date: Sun Aug 27 08:26:48 2023 -0700 + + [flang] Use DenseMap::lookup (NFC) + +commit 8b834caa62a279a0b4136bf3c8950b4f7162308e +Author: Kiran Chandramohan +Date: Wed Aug 23 11:37:00 2023 +0000 + + [Flang][OpenMP] Fix HLFIR lowering for commonblock threadprivate + + Commonblock names are not variables, but they can be marked as + threadprivate in OpenMP. This requires the commonblock name to + be bound to the address of the Commonblock. hlfir.declares are + not required for these, but we should be able to retrieve the + mlir Value corresponding to the Commonblock. This patch enables + this by special casing the Commonblocks like procedures. + + Reviewed By: tblah, vzakhari + + Differential Revision: https://reviews.llvm.org/D158070 + +commit a1c736ec08f25e83552b20c94a5b2afdcd021a40 +Author: Mark Danial +Date: Tue Aug 22 12:10:08 2023 -0400 + + [Flang] Cray pointer Lowering + + This patch is to add cray pointer (aka integer pointer) support to flang. Syntax and semantic checking were already available in flang. + Cray pointers reference (https://gcc.gnu.org/onlinedocs/gfortran/Cray-pointers.html) + + In order to implement the feature we create the following sequence for a simple scalar load and store: + + ``` + integer pte, i + pointer(ptr, pte) + i = pte + ``` + + ``` + %1 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFEi"} + %2 = fir.alloca i32 {bindc_name = "pte", uniq_name = "_QFEpte"} + %3 = fir.alloca i64 {bindc_name = "ptr", uniq_name = "_QFEptr"} + ... + %7 = fir.embox %3 : (!fir.ref) -> !fir.box + %8 = fir.box_addr %7 : (!fir.box) -> !fir.ref + %9 = fir.convert %8 : (!fir.ref) -> !fir.ref> + %10 = fir.load %9 : !fir.ref> + %11 = fir.load %10 : !fir.ptr + fir.store %11 to %1 : !fir.ref + ``` + + ``` + integer pte, i + pointer(ptr, pte) + pte = i + ``` + + ``` + %1 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFEi"} + %2 = fir.alloca i32 {bindc_name = "pte", uniq_name = "_QFEpte"} + %3 = fir.alloca i64 {bindc_name = "ptr", uniq_name = "_QFEptr"} + + %7 = fir.load %1 : !fir.ref + %8 = fir.embox %3 : (!fir.ref) -> !fir.box + %9 = fir.box_addr %8 : (!fir.box) -> !fir.ref + %10 = fir.convert %9 : (!fir.ref) -> !fir.ref> + %11 = fir.load %10 : !fir.ref> + fir.store %7 to %11 : !fir.ptr + ``` + The sequence is very similar for array element cases with the addition of fir.coordinate_of for the specific element. + The whole array case is slightly different but uses the same sequence before the fir.array_load and fir.array_merge_store. + + Reviewed By: kkwli0 + + Differential Revision: https://reviews.llvm.org/D151478 + +commit 4d04baeca5d8ce0f098e4d19317c26c30e773747 +Author: Valentin Clement +Date: Mon Aug 21 12:38:18 2023 -0700 + + [flang][openacc] Lower acc declare to the new acc.declare function + + Lower the acc delcare directive in function/subroutine + to the newly introduced acc.declare operation. Only a single + acc.declare operation is procduced in a function or subroutine + so they don't end up nested. + + Depends on D158314 + + Reviewed By: razvanlupusoru + + Differential Revision: https://reviews.llvm.org/D158315 + +commit 69a6bd5f052f076c72ef6f2a39a13f725acdb13a +Author: Valentin Clement +Date: Thu Aug 17 14:25:05 2023 -0700 + + [flang][openacc] Lower acc routine with function name + + The routine directive can appear in the specification part of + a subroutine, function or module and therefore appear before the + function or subroutine is lowered. We keep track of the created + routine info attribute and attach them to the function at the end + of the lowering if the directive appeared before the function was + lowered. + + Reviewed By: razvanlupusoru + + Differential Revision: https://reviews.llvm.org/D158204 + +commit 335b3990ef9115e3b20eb9dfa32393a7fdfde4e3 +Author: V Donaldson +Date: Mon Aug 7 13:29:17 2023 -0700 + + [flang] Do concurrent locality specifiers + +commit 14741ef88f2a00f4b5f92b981e1aec04bfa08d36 +Author: Valentin Clement +Date: Tue Aug 1 14:10:16 2023 -0700 + + [flang][openacc] Lower the exit part for OpenACC declare in function/subroutine + + This patch adds lowering for the exit part of the OpenACC declare construct + in function/subroutine. + + Depends on D156560 + + Reviewed By: razvanlupusoru + + Differential Revision: https://reviews.llvm.org/D156568 + +commit f752265231c2d15590a53e45bcc850acf2450dfc +Author: Nimish Mishra +Date: Mon Jul 31 16:34:30 2023 +0530 + + [flang][OpenMP] Support for privatization in common block + + This patch provides support for usage of common block + in private/firstprivate and lastprivate clauses. + + Reviewed By: kiranchandramohan + + Differential Revision: https://reviews.llvm.org/D156120 + +commit b4c54b20270ea1f0eac574785156c668930da5c5 +Author: Peixin Qiao +Date: Mon Jul 31 15:59:20 2023 +0530 + + [flang][OpenMP] Support common block in OpenMP private clause + + This supports the common block in OpenMP privat clause by making + each common block member host-associated privatization and + adds the test case. + + Reviewed By: kiranchandramohan + + Differential Revision: https://reviews.llvm.org/D127215 + +commit c217ff8794c2a710ef772ace1119ee773a182e9a +Author: Valentin Clement +Date: Wed Jul 26 09:55:57 2023 -0700 + + [flang][openacc] Add basic lowering for OpenACC declare construct in module + + This patch adds the skeleton and the basic lowering for OpenACC declare + construct when located in the module declaration. This patch just lower the + create clause with or without modifier. Other clause and global descrutor + lowering will come in follow up patches to keep this one small enough for + review. + + Reviewed By: razvanlupusoru + + Differential Revision: https://reviews.llvm.org/D156266 + +commit e909a2c1ca4d1f37a28293e6607136888ed889db +Author: Andrew Gozillon +Date: Thu Jul 13 11:41:06 2023 -0500 + + [Flang][OpenMP][Lower] Program level implicit SAVE variable handling for declare target + + This is an attempt at mimicing the method in which + threadprivate handles the following type of variables: + + program main + integer :: i + !$omp declare target to(i) + end + + Which essentially generates a GlobalOp for the variable (which + would normally only be an alloca) when it's instantiated. The + main difference is there is no operation generated within the + function, instead the declare target attribute is appended + later within handleDeclareTarget. + + Reviewers: kiranchandramohan + + Differential Revision: https://reviews.llvm.org/D152037 + +commit 45a9604417dbcb73d1b2fb3d7f8824a97d4b00c1 +Author: Jan Sjodin +Date: Mon Jul 10 10:55:47 2023 -0400 + + [Flang][OpenMP][MLIR] Add early outlining pass for omp.target operations to flang + + This patch implements an early outlining transform of omp.target operations in + flang. The pass is needed because optimizations may cross target op region + boundaries, but with the outlining the resulting functions only contain a + single omp.target op plus a func.return, so there should not be any opportunity + to optimize across region boundaries. + + The patch also adds an interface to be able to store and retrieve the parent + function name of the original target operation. This is needed to be able to + create correct kernel function names when lowering to LLVM-IR. + + Reviewed By: kiranchandramohan, domada + + Differential Revision: https://reviews.llvm.org/D154879 + +commit bc4586da6ef349b2777f28c0cd9b8b0f8faba125 +Author: Dmitriy Smirnov +Date: Mon Jul 3 16:31:20 2023 +0000 + + [Flang][OpenMP] Lower allocatable or pointer in private clause + + This patch lowers allocatables and pointers named in "private" OpenMP clause. + + Reviewed By: kiranchandramohan + + Differential Revision: https://reviews.llvm.org/D148570 + +commit 9bf50936237ded3fc324f4686dc4c1c5a9fb16eb +Author: Ethan Luis McDonough +Date: Fri Jun 30 15:32:21 2023 -0500 + + [flang][openmp] Parallel reduction FIR lowering + + This patch extends the logic for lowering loop construct reductions to parallel block reductions. + + Reviewed By: kiranchandramohan + + Differential Revision: https://reviews.llvm.org/D154182 + +commit 09ea692d166af42cda43bd24d42a6c67a12cce5a +Author: V Donaldson +Date: Thu Jun 29 11:32:56 2023 -0700 + + [flang] IEEE_ARITHMETIC intrinsic module procedures + + Implement + + - IEEE_CLASS + - IEEE_COPY_SIGN + - IEEE_GET_ROUNDING_MODE + - IEEE_IS_FINITE + - IEEE_IS_NAN + - IEEE_IS_NEGATIVE + - IEEE_IS_NORMAL + - IEEE_SET_ROUNDING_MODE + - IEEE_SIGNBIT + - IEEE_SUPPORT_ROUNDING + - IEEE_UNORDERED + - IEEE_VALUE + + for all REAL kinds (2, 3, 4, 8, 10, 16) where applicable. + +commit 7b4aa95d7c5e313ffb8028f627fe0480d66ef650 +Author: Slava Zakharin +Date: Thu Jun 29 10:39:52 2023 -0700 + + [flang][hlfir] Set/propagate 'unordered' attribute for elementals. + + This patch adds 'unordered' attribute handling the HLFIR elementals' + builders and fixes the attribute handling in lowering and transformations. + + Depends on D154031, D154032 + + Reviewed By: jeanPerier, tblah + + Differential Revision: https://reviews.llvm.org/D154035 + +commit e12ffe6a93505e590158ddd8cc73a4f201bbf0aa +Author: Peter Klausler +Date: Fri Jun 23 11:01:33 2023 -0700 + + [flang] Honor #line and related preprocessing directives + + Extend the SourceFile class to take account of #line directives + when computing source file positions for error messages. + Adjust the output of #line directives to -E output so that they + reflect any #line directives that were in the input. + + Differential Revision: https://reviews.llvm.org/D153910 + +commit 23fbe525ce0645341610b751184882fea264c99e +Author: Jean Perier +Date: Wed Jun 28 08:27:16 2023 +0200 + + [flang] do not merge block after lowering + + Lowering relies on dead code generation / unreachable block deletion + to delete some code that is potentially invalid. + + However, calling mlir::simplifyRegion also merges block, which may + promote SSA values to block arguments. Not all FIR types are intended + to be block arguments. + The added test shows an example where block merging led to + fir.shape<> being block arguments (and a failure later in codegen). + + Reviewed By: tblah, clementval, vdonaldson + + Differential Revision: https://reviews.llvm.org/D153858 + +commit 67169233322397e01cfdcdbf8131d77d38a41be0 +Author: Jean Perier +Date: Mon Jun 26 13:06:43 2023 +0200 + + [flang][hlfir] Lower user defined assignment + + Lower user defined assignment inside the hlfir.region_assign + "userDefinedAssignment" mlir region. + + This is done by adding an entry point to ConvertCall.h in order + to call genUserCall with the region block arguments as arguments. + + The codegen for hlfir.region_assign with user defined assignment + will be added in a later patch. + + Differential Revision: https://reviews.llvm.org/D153404 + +commit 569716fc5c2c232adcd5ff840637be596c1de9b9 +Author: Tom Eccles +Date: Wed Jun 14 13:23:00 2023 +0000 + + [flang][hlfir] Fix multiple return declaration type + + When the ENTRY statement is used, the same source can return different + types depending on the entry point. These different return values are + storage associated (share the same storage). Previously, this led to the + declaration of the results to all have the largest type. This patch adds + a convert between the stack allocation and the declaration so that the + hlfir.decl gets the right type. + + I haven't managed to generate code where this convert converted a + reference to an allocation for a smaller type into an allocation for a + larger one, but I have added an assert just in case. + + This is a different solution to https://reviews.llvm.org/D152725, see + discussion there. + + Differential Revision: https://reviews.llvm.org/D152931 + +commit 6e3a8720474528f8f752d0afbc6b8b9efab96325 +Author: Dhruv Chawla <44582521+dc03@users.noreply.github.com> +Date: Wed Jun 7 12:46:52 2023 +0530 + + [SetVector] Improve performance for small sizes + + SmallSetVector has an inefficiency where it does set insertions + regardless of the number of elements present within it. This contrasts + with other "Small-" containers where they use linear scan up to a + certain size "N", after which they switch to another strategy. + + This patch implements this functionality in SetVector, adding a template + parameter "N" which specifies the number of elements upto which the + SetVector follows the "small" strategy. Due to the use of "if + constexpr", there is no "small" code emitted when N is 0 which makes + this a zero overhead change for users using the default behaviour. + + This change also allows having SmallSetVector use DenseSet instead of + SmallDenseSet by default, which helps a little with performance. + + The reason for implementing this functionality in SetVector instead of + SmallSetVector is that it allows reusing all the code that is already + there and it is just augmented with the "isSmall" checks. + + This change gives a good speedup (0.4%): + https://llvm-compile-time-tracker.com/compare.php?from=086601eac266ec253bf313c746390ff3e5656132&to=acd0a72a4d3ee840f7b455d1b35d82b11ffdb3c0&stat=instructions%3Au + + Differential Revision: https://reviews.llvm.org/D152497 + +commit ca81808cc31074198663eb4beb904f490598d14f +Author: Kiran Chandramohan +Date: Mon Jun 5 15:43:37 2023 +0000 + + [Flang][OpenMP] Refactor to properly fix privatisation of loop bounds + + The OpenMP loop Operations have the bounds attached to them. If the + loop bounds are privatised then the privatisation has to happen + before the loop operation is created. To do this the privatisation + is split into two steps. The first step performs cloning and + firstprivate handling, the second step performs lastprivate handling. + + This also reverts the changes in the temporary fix (D127137). + + Fixes https://github.com/flang-compiler/f18-llvm-project/issues/1171#issuecomment-1143880545 + Fixes https://github.com/flang-compiler/f18-llvm-project/issues/1171#issuecomment-1119997442 + + Fixes #60872 + + Reviewed By: NimishMishra + + Differential Revision: https://reviews.llvm.org/D151504 + +commit 4ad7279392653c0bcf564799ffb3f7e20ed4ef00 +Author: Peter Klausler +Date: Sat May 6 15:03:39 2023 -0700 + + [flang] CUDA Fortran - part 1/5: parsing + + Begin upstreaming of CUDA Fortran support in LLVM Flang. + + This first patch implements parsing for CUDA Fortran syntax, + including: + - a new LanguageFeature enum value for CUDA Fortran + - driver change to enable that feature for *.cuf and *.CUF source files + - parse tree representation of CUDA Fortran syntax + - dumping and unparsing of the parse tree + - the actual parsers for CUDA Fortran syntax + - prescanning support for !@CUF and !$CUF + - basic sanity testing via unparsing and parse tree dumps + + ... along with any minimized changes elsewhere to make these + work, mostly no-op cases in common::visitors instances in + semantics and lowering to allow them to compile in the face + of new types in variant<> instances in the parse tree. + + Because CUDA Fortran allows the kernel launch chevron syntax + ("call foo<<>>()") only on CALL statements and + not on function references, the parse tree nodes for CallStmt, + FunctionReference, and their shared Call were rearranged a bit; + this caused a fair amount of one-line changes in many files. + + More patches will follow that implement CUDA Fortran in the symbol + table and name resolution, and then semantic checking. + + Differential Revision: https://reviews.llvm.org/D150159 + +commit 9ceb0a7bc0e73c4aab6cfade225f3ab33c949b83 +Author: Carlos Eduardo Seo +Date: Sat May 20 05:16:50 2023 +0000 + + Fix nested block constructs for SELECT CASE + + In some scenarios, a SELECT CASE could cause an error while lowering to FIR. + This was caused by a spurious extra branch added after the end statement. + + Fixes #62726 + + Differential Revision: https://reviews.llvm.org/D151118 + +commit ef934174704b75c8e04830bfd4f0c0bbedde9621 +Author: Kelvin Li +Date: Tue May 23 19:02:49 2023 -0400 + + [flang] Support for PowerPC vector type + + The following PowerPC vector type syntax is added: + + VECTOR ( element-type-spec ) + + where element-type-sec is integer-type-spec, real-type-sec or unsigned-type-spec. + + Two opaque types (__VECTOR_PAIR and __VECTOR_QUAD) are also added. + + A finite set of functionalities are implemented in order to support the new types: + 1. declare objects + 2. declare function result + 3. declare type dummy arguments + 4. intrinsic assignment between the new type objects (e.g. v1=v2) + 5. reference functions that return the new types + + Submit on behalf of @tislam @danielcchen + + Authors: @tislam @danielcchen + + Differential Revision: https://reviews.llvm.org/D150876 + +commit 6f7a3b078191a925546ea3fead2e9cf0efdd9257 +Author: V Donaldson +Date: Tue May 16 13:34:57 2023 -0700 + + [flang] Non-type-bound defined IO lowering + + Generate supporting data structures and calls to new runtime IO functions + for defined IO that accesses non-type-bound procedures, such as `wft` in: + + module m1 + type t + integer n + end type + interface write(formatted) + module procedure wft + end interface + contains + subroutine wft(dtv, unit, iotype, v_list, iostat, iomsg) + class(t), intent(in) :: dtv + integer, intent(in) :: unit + character(*), intent(in) :: iotype + integer, intent(in) :: v_list(:) + integer, intent(out) :: iostat + character(*), intent(inout) :: iomsg + iostat = 0 + write(unit,*,iostat=iostat,iomsg=iomsg) 'wft was called: ', dtv%n + end subroutine + end module + + module m2 + contains + subroutine test1 + use m1 + print *, 'test1, should call wft: ', t(1) + end subroutine + subroutine test2 + use m1, only: t + print *, 'test2, should not call wft: ', t(2) + end subroutine + end module + + use m1 + use m2 + call test1 + call test2 + print *, 'main, should call wft: ', t(3) + end + +commit 4eab303404d6bb2252b4baf807c5ac87a0fa3125 +Author: Slava Zakharin +Date: Tue May 16 20:05:22 2023 -0700 + + [flang][hlfir] Fixed symbol lookup for character returns. + + Symbols corresponding to entries returning character results + must be mapped to EmboxCharOp, first, before we can map them + to DeclareOp. The code may be reworked after HLFIR is enabled + by default, but right now it seems like an acceptable solution to me. + + Differential Revision: https://reviews.llvm.org/D150749 + +commit 7f7bbc73175d94f63cba905191a4ecc341b9fdba +Author: Peter Klausler +Date: Tue May 16 12:33:29 2023 -0700 + + [flang] Correct overriding (or not) of inaccessible bindings + + Fortran doesn't allow inaccessible procedure bindings to be + overridden, and this needs to apply to generic resolution. + When resolving a type-bound generic procedure from another + module, ensure only that the most extended override from its + module is used if it is PRIVATE, not a later apparent override + from another module. + + Differential Revision: https://reviews.llvm.org/D150721 + +commit be5747e516937df6436c9abb8059b6e471c02226 +Author: Slava Zakharin +Date: Tue May 9 19:50:48 2023 -0700 + + [flang] Fixed global name creation for literal constants. + + The global names were created using a hash based on the address + of std::vector::data address. Since the memory may be reused + by different std::vector's, this may cause non-equivalent + constant expressions to map to the same name. This is what is happening + in the modified flang/test/Lower/constant-literal-mangling.f90 test. + + I changed the name creation to use a map between the constant expressions + and corresponding unique names. The uniquing is done using a name counter + in FirConverter. The effect of this change is that the equivalent + constant expressions are now mapped to the same global, and the naming + is "stable" (i.e. it does not change from compilation to compilation). + + Though, the issue is not HLFIR specific it was affecting several tests + when using HLFIR lowering. + + Differential Revision: https://reviews.llvm.org/D150380 + +commit c7ff45a529ca16c8a3dbff3b6786e41b49c195cc +Author: Jean Perier +Date: Tue May 9 09:22:24 2023 +0200 + + [flang][hlfir] Lower left-hand side vector subscripts to HLFIR + + This patch lowers assignments to vector subscripted designators into the + newly added hlfir.elemental_addr and hlfir.region_assign. + + Note that the codegen of these operation to FIR is still TODO and will + still emit a TODO message when trying to compile programs end to end. + + Differential Revision: https://reviews.llvm.org/D149962 + +commit 54c88fc9dfa5854a5891cf3d68d3d2c4a4ba0f25 +Author: Jean Perier +Date: Tue May 9 09:21:09 2023 +0200 + + [flang][hlfir] Lower WHERE to HLFIR + + Lower WHERE to the newly added hlfir.where and hlfir.elsewhere + operations. + + Differential Revision: https://reviews.llvm.org/D149950 + +commit b87e65531c58df55cfae4c06c7a68f84539aa779 +Author: Jean Perier +Date: Tue May 9 09:18:53 2023 +0200 + + [flang][hlfir] Lower forall to HLFIR + + Lower Forall to the previously added hlfir.forall, hlfir.forall_mask. + hlfir.forall_index, and hlfir.region_assign operations. + + The HLFIR assignment code lowering is moved into genDataAssignment for + more readability and so that user defined assignment (still a TODO), + will be able to share most of the logic. + + Differential Revision: https://reviews.llvm.org/D149878 + +commit 42df495114dc75fac4f75babe8f1ed43b15515fc +Author: Ethan Luis McDonough +Date: Fri May 5 15:50:18 2023 -0500 + + [flang] OpenMP allocate directive parse tree fix + + Addresses the same issue as the following abandoned revision: D104391. + + Rewrite leading declarative allocations so they are nested within their respective executable allocate directive + + Original: + ExecutionPartConstruct -> OpenMPDeclarativeAllocate + ExecutionPartConstruct -> OpenMPDeclarativeAllocate + ExecutionPartConstruct -> OpenMPExecutableAllocate + + After rewriting: + ExecutionPartConstruct -> OpenMPExecutableAllocate + | ExecutionPartConstruct -> OpenMPDeclarativeAllocate + | ExecutionPartConstruct -> OpenMPDeclarativeAllocate + + Reviewed By: kiranchandramohan + + Differential Revision: https://reviews.llvm.org/D148409 + + Co-authored-by: Isaac Perry + +commit a6e616cdb1bb89f19a9df5e72b1e4256fed19968 +Author: Ethan Luis McDonough +Date: Fri May 5 15:47:00 2023 -0500 + + Revert "[flang] OpenMP allocate directive parse tree fix" + + This reverts commit 597d8563cd66f23d857196bf135a0c513115ece2. + +commit 597d8563cd66f23d857196bf135a0c513115ece2 +Author: Ethan Luis McDonough +Date: Fri May 5 14:53:08 2023 -0500 + + [flang] OpenMP allocate directive parse tree fix + + Addresses the same issue as the following abandoned revision: D104391. + + Rewrite leading declarative allocations so they are nested within their respective executable allocate directive + + Original: + ``` + ExecutionPartConstruct -> OpenMPDeclarativeAllocate + ExecutionPartConstruct -> OpenMPDeclarativeAllocate + ExecutionPartConstruct -> OpenMPExecutableAllocate + ``` + + After rewriting: + ``` + ExecutionPartConstruct -> OpenMPExecutableAllocate + | ExecutionPartConstruct -> OpenMPDeclarativeAllocate + | ExecutionPartConstruct -> OpenMPDeclarativeAllocate + ``` + + Co-authored-by: Isaac Perry + + Reviewed By: kiranchandramohan + + Differential Revision: https://reviews.llvm.org/D148409 + +commit 936d40cdb5c82ca74358b260dc69f7150209a81a +Author: Ethan Luis McDonough +Date: Fri May 5 14:50:14 2023 -0500 + + Revert "[flang] OpenMP allocate directive parse tree fix" + + This reverts commit 5faf45a3d24e603cbc8fe4eb45da386653dae5e5. + Once again arcanist stripped the co-author metadata. I'm going to add it to the revision description and try one last time. + +commit 5faf45a3d24e603cbc8fe4eb45da386653dae5e5 +Author: Ethan Luis McDonough +Date: Fri May 5 14:32:45 2023 -0500 + + [flang] OpenMP allocate directive parse tree fix + + Addresses the same issue as the following abandoned revision: D104391. + + Rewrite leading declarative allocations so they are nested within their respective executable allocate directive + + Original: + ``` + ExecutionPartConstruct -> OpenMPDeclarativeAllocate + ExecutionPartConstruct -> OpenMPDeclarativeAllocate + ExecutionPartConstruct -> OpenMPExecutableAllocate + ``` + + After rewriting: + ``` + ExecutionPartConstruct -> OpenMPExecutableAllocate + | ExecutionPartConstruct -> OpenMPDeclarativeAllocate + | ExecutionPartConstruct -> OpenMPDeclarativeAllocate + ``` + + Reviewed By: kiranchandramohan + + Differential Revision: https://reviews.llvm.org/D148409 + +commit c89959842fe5c1631db60db5dd5994a42810d7e8 +Author: Ethan Luis McDonough +Date: Fri May 5 14:31:01 2023 -0500 + + Revert "[flang] OpenMP allocate directive parse tree fix" + + This reverts commit eaf7d97865140a17f13ad77e5dc0216438127094. + Arcanist stripped co-author data from initial commit. + +commit eaf7d97865140a17f13ad77e5dc0216438127094 +Author: Ethan Luis McDonough +Date: Fri May 5 13:49:45 2023 -0500 + + [flang] OpenMP allocate directive parse tree fix + + Addresses the same issue as the following abandoned revision: D104391. + + Rewrite leading declarative allocations so they are nested within their respective executable allocate directive + + Original: + ``` + ExecutionPartConstruct -> OpenMPDeclarativeAllocate + ExecutionPartConstruct -> OpenMPDeclarativeAllocate + ExecutionPartConstruct -> OpenMPExecutableAllocate + ``` + + After rewriting: + ``` + ExecutionPartConstruct -> OpenMPExecutableAllocate + | ExecutionPartConstruct -> OpenMPDeclarativeAllocate + | ExecutionPartConstruct -> OpenMPDeclarativeAllocate + ``` + + Reviewed By: kiranchandramohan + + Differential Revision: https://reviews.llvm.org/D148409 + +commit ec2c0e0f55637209d1901c73f162dd8625034a56 +Author: Slava Zakharin +Date: Thu May 4 08:47:28 2023 -0700 + + [flang][hlfir] Generate explicit HLFIR type cast for implicit logical<->integer conversion. + + hlfir.assign, in general, ends up calling the Assign runtime that asserts + that the types of LHS and RHS match. In case of implicit logical<->integer + conversions (allowed as an extension) the operands of hlfir.assign + have non-matching types. This change makes sure that the lowering + produces explicit type cast (either as a scalar fir.convert or + as a hlfir.elemental producing array expression). + + Reviewed By: jeanPerier + + Differential Revision: https://reviews.llvm.org/D149765 + +commit 583d492c630655dc0cd57ad167dec03e6c5d211c +Author: Jean Perier +Date: Wed May 3 09:19:12 2023 +0200 + + [flang][hlfir] Lower vector subscripted RHS designators + + Lower vector subscripted designators as values when they appear outside + of the assignment left-hand side and input IO contexts. + + This matches Fortran semantics where vector subscripted designators cannot + be written to outside of the two contexts mentioned above: they are + passed/taken by value where they appear. + + This patch uses the added hlfir.element_addr to lower vector designators + in lowering. But when reaching the end of the designator lowering, the + hlfir.element_addr is turned into an hlfir.elemental when lowering is + not asking for the hlfir.elemental_addr. + + This approach allows lowering vector subscripted in the same way in + while visiting the designator, and only adapt to the context at the + edge. + + The part where lowering uses the hlfir.elemental_addr will be + done in further patch as it requires lowering assignments in the + new hlfir.region_assign op, and there is not codegen yet for these + new operations. + + Differential Revision: https://reviews.llvm.org/D149480 + +commit af78197857115716802189ef073f83cdac9ede15 +Author: V Donaldson +Date: Wed Apr 12 15:37:19 2023 -0700 + + [flang] Remove `ignoring all compiler directives` warning + + The explicit `ignoring all compiler directives` reminder warning is no + longer accurate. Any similar, more accurate message is best generated + by the front end (change pending). + +commit fd922e6ab0b5324cdf36e2646132d802d3a04ce0 +Author: V Donaldson +Date: Wed Apr 5 11:13:36 2023 -0700 + + [flang] Nonconformant assigned gotos + + Modify code generation for assigned gotos to generate a runtime error + for most cases that violate F90 Clause 8.2.4, rather than treating a + nonconformant GOTO as a nop. For example, generate a runtime error for + a GOTO that attempts to branch to a label for a FORMAT statement. + Relax the requirement that an assigned GOTO with a label list must + branch to a label in the list, and instead allow a branch to any valid + assigned GOTO target in scope. + +commit 04a920b76acf0a52a3eb957c6331ba81a1173e2a +Author: Jean Perier +Date: Mon Apr 3 09:18:41 2023 +0200 + + [flang] preserve pointer rank in polymorphic_pointer => NULL() + + The current lowering for polymorphic pointer association was not + dealing with NULL in a "context aware" fashion: it was calling the + `PointerAssociate` runtime entry point with a fir.box target. + But the fir.box is a descriptor for a scalar, this lead the + runtime to set the pointer rank to zero, regardless of its actual + rank. + + I do not think there is a way to expose this problem with the Fortran + code currently supported by flang, because most further manipulation of + the pointer would either set the rank correctly, or do not rely on the + rank in the runtime descriptor. + + However, this is incorrect, and when assumed rank are supported, the + following would have failed: + + ``` + subroutine check_rank(p) + class(*), pointer :: p(..) + p => null() + select rank(p) + rank (1) + print *, "OK" + rank default + print *, "FAILED" + end select + end subroutine + class(*), pointer :: p(:) + p => null() + call check_rank(p) + end + ``` + + Instead, detect NULL() in polymorphic pointer lowering and trigger the + deallocation of the pointer. + + Differential Revision: https://reviews.llvm.org/D147317 + +commit 5e521580e60a6bf5bf62c19b2028f9f390c5e4a6 +Author: V Donaldson +Date: Fri Mar 31 09:36:16 2023 -0700 + + [flang] IO condition specfier control flow + + Execution of a statement such as + + read(internal,*,err=666,iostat=stat) k + + that terminates with an END or EOR condition must not take the ERR branch. + +commit 6472a2ee363f3b5ac823e471b7ba5582c101a528 +Author: Valentin Clement +Date: Tue Mar 14 16:01:36 2023 +0100 + + [flang] Handle parent component on the LHS of intrinsic assignment + + When the LHS is referring to a parent component the box need to be + reboxed to the parent component type so the runtime can handle the + assignment correctly. + + Reviewed By: PeteSteinfeld + + Differential Revision: https://reviews.llvm.org/D146046 + +commit b07ef9e7cd6f5348df0a4f63e70a60491427ff64 +Author: Renaud-K +Date: Wed Mar 8 18:39:40 2023 -0800 + + Break circular dependency between FIR dialect and utilities + +commit 4f3c98542bebac90546a03363e5956f9862ae985 +Author: Valentin Clement +Date: Mon Mar 6 09:35:36 2023 +0100 + + [flang] Use AssignPolymorphic when LHS is polymorphic + + Make use of the new runtime entry point for assignment to + LHS allocatable polymorphic. + + Reviewed By: jeanPerier + + Differential Revision: https://reviews.llvm.org/D145324 + +commit 2c1433453d1670f668220670b8f2df60f9dc9949 +Author: V Donaldson +Date: Mon Feb 27 14:05:53 2023 -0800 + + [flang] Block construct + + A block construct is an execution control construct that supports + declaration scopes contained within a parent subprogram scope or another + block scope. (blocks may be nested.) This is implemented by applying + basic scope processing to the block level. + + Name uniquing/mangling is extended to support this. The term "block" is + heavily overloaded in Fortran standards. Prior name uniquing used tag `B` + for common block objects. Existing tag choices were modified to free up `B` + for block construct entities, and `C` for common blocks, and resolve + additional issues with other tags. The "old tag -> new tag" changes can + be summarized as: + + -> B -- block construct -> new + B -> C -- common block + C -> YI -- intrinsic type descriptor; not currently generated + CT -> Y -- nonintrinsic type descriptor; not currently generated + G -> N -- namelist group + L -> -- block data; not needed -> deleted + + Existing name uniquing components consist of a tag followed by a name + from user source code, such as a module, subprogram, or variable name. + Block constructs are different in that they may be anonymous. (Like other + constructs, a block may have a `block-construct-name` that can be used + in exit statements, but this name is optional.) So blocks are given a + numeric compiler-generated preorder index starting with `B1`, `B2`, + and so on, on a per-procedure basis. + + Name uniquing is also modified to include component names for all + containing procedures rather than for just the immediate host. This + fixes an existing name clash bug with same-named entities in same-named + host subprograms contained in different-named containing subprograms, + and variations of the bug involving modules and submodules. + + F18 clause 9.7.3.1 (Deallocation of allocatable variables) paragraph 1 + has a requirement that an allocated, unsaved allocatable local variable + must be deallocated on procedure exit. The following paragraph 2 states: + + When a BLOCK construct terminates, any unsaved allocated allocatable + local variable of the construct is deallocated. + + Similarly, F18 clause 7.5.6.3 (When finalization occurs) paragraph 3 + has a requirement that a nonpointer, nonallocatable object must be + finalized on procedure exit. The following paragraph 4 states: + + A nonpointer nonallocatable local variable of a BLOCK construct + is finalized immediately before it would become undefined due to + termination of the BLOCK construct. + + These deallocation and finalization requirements, along with stack + restoration requirements, require knowledge of block exits. In addition + to normal block termination at an end-block-stmt, a block may be + terminated by executing a branching statement that targets a statement + outside of the block. This includes + + Single-target branch statements: + - goto + - exit + - cycle + - return + + Bounded multiple-target branch statements: + - arithmetic goto + - IO statement with END, EOR, or ERR specifiers + + Unbounded multiple-target branch statements: + - call with alternate return specs + - computed goto + - assigned goto + + Lowering code is extended to determine if one of these branches exits + one or more relevant blocks or other constructs, and adds a mechanism to + insert any necessary deallocation, finalization, or stack restoration + code at the source of the branch. For a single-target branch it suffices + to generate the exit code just prior to taking the indicated branch. + Each target of a multiple-target branch must be analyzed individually. + Where necessary, the code must first branch to an intermediate basic + block that contains exit code, followed by a branch to the original target + statement. + + This patch implements an `activeConstructStack` construct exit mechanism + that queries a new `activeConstruct` PFT bit to insert stack restoration + code at block exits. It ties in to existing code in ConvertVariable.cpp + routine `instantiateLocal` which has code for finalization, making block + exit finalization on par with subprogram exit finalization. Deallocation + is as yet unimplemented for subprograms or blocks. This may result in + memory leaks for affected objects at either the subprogram or block level. + Deallocation cases can be addressed uniformly for both scopes in a future + patch, presumably with code insertion in routine `instantiateLocal`. + + The exit code mechanism is not limited to block construct exits. It is + also available for use with other constructs. In particular, it is used + to replace custom deallocation code for a select case construct character + selector expression where applicable. This functionality is also added + to select type and associate constructs. It is available for use with + other constructs, such as select rank and image control constructs, + if that turns out to be necessary. + + Overlapping nonfunctional changes include eliminating "FIR" from some + routine names and eliminating obsolete spaces in comments. + +commit e5921ef021efa7e696421069d294d66e58df2541 +Author: Jean Perier +Date: Mon Feb 27 09:05:11 2023 +0100 + + [flang][hlfir] Lower associate construct to HLFIR + + - always use genExprAddr when lowering to HLFIR: it does not create + temporary for array sections without vector subscripts, so there is + no need to have custom logic. + + - update mangling to deal with AssocDetailsEntity. Their name is + required in HLFIR so that it can be added to the hlfir.declare + that is created for the selector once it is lowered. This should + allow getting debug info for selector when debug info are generated + from hlfir.declare. + + The rest of associate construct lowering is unchanged and shared with + the current lowering. + + This patch also enables select type lowering to work properly, but some + other todos (mainly about parent component references) prevents porting + the tests for now, so this will be done later. + + Differential Revision: https://reviews.llvm.org/D144740 + +commit 713b3ad43850ad9fc89b89f53ab37b54f744ec70 +Author: Jean Perier +Date: Mon Feb 27 09:04:20 2023 +0100 + + [flang][hlfir] Lower allocatable assignment to HLFIR + + Nothing much to do except set the right attributes on hlfir.assign. + + Differential Revision: https://reviews.llvm.org/D144727 + +commit 18983df02fdfb218a6e46620912ef1d0ebb428e9 +Author: Peter Steinfeld +Date: Wed Feb 22 10:51:54 2023 -0800 + + [Flang] Don't crash when BOZ literals are on the rhs of an assignment + + For BOZ literals, the rhsType will be empty. Check for that before + trying to access its value. + + Differential Revision: https://reviews.llvm.org/D144576 + +commit 33c29a82a9b3b8a0354a5b5bd75b462505602107 +Author: Valentin Clement +Date: Tue Feb 21 10:14:00 2023 +0100 + + [flang] Use runtime Assign when rhs is polymorphic + + Use the runtime when there lhs or rhs is polymorphic. The runtime + allows to deal better with polymorphic entities and aliasing. + + Reviewed By: PeteSteinfeld + + Differential Revision: https://reviews.llvm.org/D144418 + +commit f5cca3c5ce7a1a6d9934e22c60e47ccd1834cf99 +Author: Valentin Clement +Date: Thu Feb 16 20:59:54 2023 +0100 + + [flang] Handle expression in SELECT TYPE selector + + Expression in selector were raising an error. In some + cases expression can be found in selector. This patch + updates the code to accept expression and adds a lowering + test. + + Reviewed By: PeteSteinfeld, vdonaldson + + Differential Revision: https://reviews.llvm.org/D144185 + +commit b0de87268a60e9e755b34b2fb505589e01aab14c +Author: Valentin Clement +Date: Thu Feb 16 09:05:12 2023 +0100 + + [flang] Retrieve the correct scope when lowering SELECT TYPE + + Scope to retrieve the associating entity is needed to map the + symbol to the IR value. The scope can be found with a source + information. For the type case in SELECT TYPE construct, the source + information is on the Statement. This patch updates + the lowering so the scopes for each type guards is retrieved + before the processing. + + Reviewed By: PeteSteinfeld, vdonaldson + + Differential Revision: https://reviews.llvm.org/D144133 + +commit cedfd2721e3492e5ab0ea86d24d8027846687c27 +Author: Jean Perier +Date: Thu Feb 9 09:02:43 2023 +0100 + + [flang][hlfir] Lower procedure designators to HLFIR + + - Add a convertProcedureDesignatorToHLFIR that converts the + fir::ExtendedValue from the current lowering to a + fir.boxproc/tuple mlir::Value. + + - Allow fir.boxproc/tuple as hlfir::Entity values + (a function is an address, but from a Fortran entity point of view, + procedure that are not procedure pointers cannot be assigned to, so + it makes a lot more sense to consider those as values). + + - Modify symbol association to not generate an hlfir.declare for dummy + procedures. They are not needed and allowing hlfir.declare to declare + function values would make its verifier and handling overly complex + for little benefits (maybe an hlfir.declare_proc could be added if it + turnout out useful later for debug info and attributes storing + purposes). + + - Allow translation from hlfir::Entity to fir::ExtendedValue. + convertToBox return type had to be relaxed because some intrinsics + handles both object and procedure arguments and need to lower their + object arguments "asBox". fir::BoxValue is not intended to carry + dummy procedures (all its member functions would make little sense + and its verifier does not accept such type). + Note that AsAddr, AsValue and AsBox will always return the same MLIR + value for procedure designators because they are always handled the + same way in FIR. + + Differential Revision: https://reviews.llvm.org/D143585 + +commit ab9c4e9fff272dd88c92a2d2f3a2e5c66e07e6e2 +Author: Jean Perier +Date: Tue Feb 7 09:22:47 2023 +0100 + + [flang][NFC] addSymbol/lookupSymbol clean-up + + HLFIR requires mapping symbol to a single mlir::Value (produced + by a fir::FortranVariableOpInterface), while the current lowering + maps the value to a fir::ExtdendedValue. + + So far, the HLFIR symbol query was a special one. Hence, all the code + directly using symMap.lookupSymbol and symMap.addSymbol did not work + with the lowering to HLFIR. + + Refactor the code so that symbol lookup and add symbol go through + the converter in a centralize place that handles the HLFIR case + (translate fir::FortranVariableOpInterface to fir::ExtdendedValue + in lookups, and generate hlfir.declare when adding symbols). + + In the refactoring, fir::FortranVariableOpInterface is added as + a symbolBox variant to avoid special casing all lookups (shallowLookup...). + + Remove some unused SymbolBox member function instead of updating + them. + + Differential Revision: https://reviews.llvm.org/D143395 + +commit dda01632db12d3b11d8e2e21d73d438626cb0436 +Author: Valentin Clement +Date: Tue Feb 7 09:15:54 2023 +0100 + + [flang] Use PointerAssociateLowerBounds when there is lower bounds + + The current code was not taking provided lower bounds when the pointer + is polymorphic and was just calling PointerAssociate. This patch + updates the behavior and use PointerAssociateLowerBounds with the provided + lower bounds. + + Reviewed By: jeanPerier + + Differential Revision: https://reviews.llvm.org/D143392 + +commit 3b73fc320f918a12a77e617f162bc7f7111ddfaf +Author: Valentin Clement +Date: Mon Feb 6 21:06:44 2023 +0100 + + [flang] Fix creation of the bound array for pointer remapping + + The runtime function expects a 2 x newRank array and the code + was passing a newRank x 2 array. This patch updates the + creation of the array to fit the runtime expectation. + + Reviewed By: jeanPerier + + Differential Revision: https://reviews.llvm.org/D143405 + +commit ed8e858a1478d18d321f104a86a579e03ba1886e +Author: Jean Perier +Date: Mon Feb 6 15:14:08 2023 +0100 + + [flang][hlfir] deref pointers before lowering assignment to hlfir.assign + + There is little point not to dereference pointers LHS and RHS before + before emitting an hlfir.assign when lowering an assignment. + This pushes complexity and descriptor read side effects that are better + expressed in a load before the assignment. + + Differential Revision: https://reviews.llvm.org/D143372 + +commit 7f0074a64a30c448fec2f36d08dffbe64134e84d +Author: Valentin Clement +Date: Fri Feb 3 12:21:59 2023 +0100 + + [flang] Avoid double finalization when intrinsic assignment is done in the runtime + + genRecordAssignment is emitting code to call Assign in the runtime for some cases. + In these cases, the finalization is done by the runtime so we do not need to do it in + a separate cal to avoid multiple finalization.. + Also refactor the code in Bridge so the actual finalization of allocatable + is done before any reallocation. We might need to push this into ReallocIfNeeded. + It is not clear if the allocatable lhs needs to be finalized in any cases or only if it is + reallocated. + + Reviewed By: jeanPerier + + Differential Revision: https://reviews.llvm.org/D143186 + +commit 6ada493035efcd1e90e8e062595c478babe7cd18 +Author: Valentin Clement +Date: Fri Feb 3 10:16:54 2023 +0100 + + [flang] Fix potential null scope when lowering dispatch table op + + Similary to D140209, the scope might need to be retrieved + from the typeSymbol. The test code was crashing because the + scope passed to CollectBindings was initially null. + + Reviewed By: jeanPerier + + Differential Revision: https://reviews.llvm.org/D143188 + +commit 591e3e6207894ebaee1e47a82fcfb3b246459f76 +Author: Valentin Clement +Date: Wed Feb 1 15:53:52 2023 +0100 + + [flang] Make EndProgramStmt a NOP + early return + + Fix done in D143055 can be simpler by making EndProgramStmt a NOP + and dealing with the exit in `endNewFunction` in a centralize way. + Also add finalization when there is an early exit in the main + program. + + Reviewed By: jeanPerier + + Differential Revision: https://reviews.llvm.org/D143065 + +commit d65aeeb360e279dfc7c9ddf7c8555b585e9df0d4 +Author: Valentin Clement +Date: Wed Feb 1 14:45:53 2023 +0100 + + [flang] Make sure derived-type finalization is done before return + + Finalization needs to be done before the terminator. In case + of end program, this was done after it and trigger a verifier error. + This patch fixes this case. + + Reviewed By: jeanPerier + + Differential Revision: https://reviews.llvm.org/D143055 + +commit 97492fd1aed56e3d041952914849d95b5ff999af +Author: Valentin Clement +Date: Tue Jan 31 13:46:12 2023 +0100 + + [flang] derived-type finalization + + This patch implements the derived-type finalization for + monomorphic and polymorphic derived-type. + + The finalization is done through a call to the `Destroy` + runtime function so the allocatable component object are also + finalized correctly when needed. It would be possible to finalize + monomorphic derived-type with non finalizable component with a + direct call to their finalize subroutine. + + 7.5.6.3 point 1: LHS nonallocatable object and LHS allocatable + object finalization. Done with call to `Destroy` for monomorphic + derived-type and through `Assign` for polymorphic entities. + + 7.5.6.3 point 2: Done within the deallocation calls. + + 7.5.6.3 point 3: A function context is added to the bridge to + attach finalization that need to happen on function/subroutine + exit. + + 7.5.6.3 point 4: BLOCK construct not yet implemented. + + 7.5.6.3 point 5/6: Finalization attach to the stmtCtx in a + similar way than 9.7.3.2 point 4. + + 7.5.6.3 point 7: INTENT(OUT) finalization done with a + call to `Destroy` runtime function call. + + This patch passes 9/10 tests in the proposed test-suite + https://github.com/llvm/llvm-test-suite/pull/13 + + - The case with BLOCK construct will be implemented later when + BLOCK are implemented upstream. + + - Automatic deallocation is not yet implemented. Finalization triggered + by automatic deallocation is then not triggered. + + Reviewed By: jeanPerier, PeteSteinfeld + + Differential Revision: https://reviews.llvm.org/D142707 + +commit 3af9dfe464446da8e9acea45681df28b18583370 +Author: Kiran Chandramohan +Date: Wed Jan 25 13:37:54 2023 +0000 + + [Flang][Debug] Use pathnames from location of functions + + This ensures that functions in included files have the correct path + in their file metadata. + + Note: This patch also sets all locations to have the full path names. + + Reviewed By: vzakhari, PeteSteinfeld + + Differential Revision: https://reviews.llvm.org/D142263 + +commit 7aa8a9f1abe2e0133febe015ac502737b06828f4 +Author: Valentin Clement +Date: Wed Jan 25 09:17:27 2023 +0100 + + [flang] Fix bounds array creation for pointer remapping calls + + `PointerAssociateRemapping` expect a descriptor holding + a newRank x 2 array of int64. The previous lowering was wrong. + Adapt the lowering to fit the expectation of the runtime. + Use the `bounds` to get the rank. + + Reviewed By: PeteSteinfeld + + Differential Revision: https://reviews.llvm.org/D142487 + +commit 7531c87183822cf8931496a757a09779e24aeac0 +Author: Jean Perier +Date: Fri Jan 20 14:05:42 2023 +0100 + + [flang][hlfir] Enable allocate, deallocate, pointer assignment lowering + + The previous patches allowed lowering allocatable/and pointer designator + expressions with HLFIR. + This patch updates the bridge genExprMutableBox to use HLFIR lowering + when HLFIR flag is set. For allocate and deallocate lowering that use + genExprMutableBox, no other change is needed. + + For pointer assignments, the code doing the pointer assignments in the + bridge can be reused and is simply moved so that it can be shared, and + the "explicit context" special cases of the previous lowering are + by-passed. + + The code doing pointer assignment revealed that convertExprToAddress + did not match the previous genExprAddr behavior (that actually + does not create temps for "x" where x is not contiguous). + Instead of trying to copy the old behavior that is a bit weird (was + dictated by the implementation rather than design). Update + convertExprToAddress to do something sensible and that works with + the current genExprAddr usages (if anything, it should saves bogus + array section temps). + + Differential Revision: https://reviews.llvm.org/D142197 + +commit eef0210706bc16ffde17d5b574799d8aae705790 +Author: Kiran Chandramohan +Date: Thu Jan 19 16:49:26 2023 +0000 + + [Flang][Debug] Modifications for getting pathname + + -> Use file pathname from the Flang frontend. It is the frontend + that is in-charge of finding the files and is hence the canonical + source for paths. + -> Convert pathname to absolute pathname while creating the moduleOp. + + Co-authored-by: Peter Klausler + + Reviewed By: PeteSteinfeld, vzakhari, jeanPerier, awarzynski + + Differential Revision: https://reviews.llvm.org/D141674 + +commit a459a2485b54fbd9e1f8a48061e79cdcd12b12a5 +Author: Valentin Clement +Date: Thu Jan 19 17:32:02 2023 +0100 + + [flang] Fix SELECT TYPE lowering when CLASS DEFAULT is not the last type guard + + CLASS DEFAULT needs to be the last attribute when fir.select_type op is created. + It needs to be at its actual position in the Fortran code when the TypeGuardStmt + are processed. The current lowering was crashing when CLASS DEFAULT was not at + the last position. + This patch fixes the issue by tracking the actual position of the CLASS DEFAULT + type guard and set it at the correct position after the fir.select_type op + is created. + + Reviewed By: jeanPerier, PeteSteinfeld + + Differential Revision: https://reviews.llvm.org/D142091 + +commit 91682b2631b224a9f6dca9512b5e0951cc4a7762 +Author: Kazu Hirata +Date: Sat Jan 14 14:06:18 2023 -0800 + + Remove redundant initialization of std::optional (NFC) + +commit 199e49746db83f1e56d5899f1905784bbfa142e3 +Author: Jean Perier +Date: Fri Jan 13 09:15:52 2023 +0100 + + [flang] Lower elemental intrinsics to hlfir.elemental + + - Move the core code generating hlfir.elemental for user calls from + genUserElementalCall into a new ElementalCallBuilder class and use + C++ CRTP (curiously recursive template pattern) to implement the + parts specific to user and intrinsic call into ElementalUserCallBuilder + and ElementalIntrinsicCallBuilder. This allows sharing the core logic + to lower elemental procedures for both user defined and intrinsics + procedures. + + - To allow using ElementalCallBuilder, split the intrinsic lowering code + into two parts: first lower the arguments to hlfir::Entity regardless + of the interface of the intrinsics, and then, in a different function + (genIntrinsicProcRefCore), prepare the hlfir::Entity according to the + interface. This allows using the same core logic to prepare "normal" + arguments for non-elemental intrinsics, and to prepare the elements of + array arguments inside elemental call (ElementalIntrinsicCallBuilder + calls genIntrinsicProcRefCore once it has computed the scalar actual + arguments). + To allow this split, genExprBox/genExprAddr/genExprValue logic had to + be split in ConvertExprToHlfir.[cpp/h]. + + - Add missing statement context pushScope/finalizeAndPop around the + code generation inside the hlfir.elemental so that any temps created + while lowering the call at the element level is correctly cleaned-up. + + - One piece of code in hlfir::Entity::hasNonDefaultLowerBounds() was wrong for assumed shape arrays (returned true when an assumed shaped array had no explicit lower bounds). This caused the added test to hit a bogus TODO, so fix it. + + Elemental intrinsics returning are still TODO (e.g., adjustl). I will implement this in a next patch, this one is big enough. + + Differential Revision: https://reviews.llvm.org/D141612 + +commit 87e547d8f035c8de321e246629dd2b1ccec31662 +Author: Kiran Chandramohan +Date: Thu Jan 12 10:34:34 2023 +0000 + + [Flang] Add/Restore basic debug support (1/n) + + Recent changes to MLIR meant that Flang does not generate any debug line + table information. + + This patch adds a pass that provides some foundation work with which + basic line table debug info can be generated. A walk is performed on + all the `func` ops in the module and they are decorated with a fusedLoc + op that contains the debug metadata for the subroutine along with + location information. + + Alternatives include populating this info during lowering or during FIR + to LLVM Dialect conversion. + + Note: Patches in future will add + -> more realistic debug info for types and other fields. + -> driver flags to control generation of debug. + + Fixes #58634. + + Reviewed By: awarzynski, vzakhari + + Differential Revision: https://reviews.llvm.org/D137956 + +commit 4e78f88561af26c74b4b7fa2a017cd836a9f9bf4 +Author: Jean Perier +Date: Tue Jan 10 09:28:08 2023 +0100 + + [flang] Lower addresses inside global initializers in HLFIR + + Move the code to lower an expression to address or a box in HLFIR from + Bridge.cpp to ConvertExpr.cpp so that it can be used inside + ConvertVariable.cpp (that needs to use a different symbol map that the + one held in the bridge). + + Lower NULL to hlfir.null. + + This allows lowering derived type constant structure constructors with + pointer components into fir.global. + + Differential Revision: https://reviews.llvm.org/D141276 + +commit c09215860fd5c32012ef4fdc5a001485a04fe85a +Author: Kazu Hirata +Date: Sat Jan 7 22:26:48 2023 -0800 + + [flang] Use std::optional instead of llvm::Optional (NFC) + + This patch replaces (llvm::|)Optional< with std::optional<. I'll post + a separate patch to remove #include "llvm/ADT/Optional.h". + + This is part of an effort to migrate from llvm::Optional to + std::optional: + + https://discourse.llvm.org/t/deprecating-llvm-optional-x-hasvalue-getvalue-getvalueor/63716 + +commit 4d4d4785e00824b8f4824d09126547379d5a2093 +Author: Kazu Hirata +Date: Sat Jan 7 20:55:47 2023 -0800 + + [flang] Add #include (NFC) + + This patch adds #include to those files containing + llvm::Optional<...> or Optional<...>. + + I'll post a separate patch to actually replace llvm::Optional with + std::optional. + + This is part of an effort to migrate from llvm::Optional to + std::optional: + + https://discourse.llvm.org/t/deprecating-llvm-optional-x-hasvalue-getvalue-getvalueor/63716 + +commit 609b789170625277f631139c790c22d527ff1eed +Author: V Donaldson +Date: Tue Jan 3 10:31:30 2023 -0800 + + [flang] Control flow graph issues + + Address several issues involving control flow graph generation and + structured code ops. + + - Fix a problem with constructs nested inside unstructured selection + constructs. This is a general problem involving branches that are + implied rather than explicit. It is addressed in the generic genFIR + "wrapper" function that calls individual statement-specific genFIR calls. + + - The previous fix requires some compensating changes in IF and DO + construct code lowering. + + - Streamline the code to generate explicit DO loop variable updates. + + - Fix a problem with the individual detailed genFIR calls made in the + genFIR(SelectTypeConstruct) call. + + - Modify control flow graph generation to support the insertion of + deallocation and finalization code when lowering most END + statements. + +commit a8234196c58396c0505ac93983dafee743a67b11 +Author: Peter Klausler +Date: Mon Dec 19 12:41:25 2022 -0800 + + [flang] Restore checking for some optional values before use + + Recent commits (2098ad7f00324ee0f2a6538f418a6f81dfdd2edb and + 15a9a72ee68166c0cff3f036cacd3c82be66c729) replaced usage of "o.value()" + on optionals with "*o". Those optional values are expected to be + present -- but now, if it ever turns out that they're not, + compilation will proceed with garbage data rather than crashing + immediately (and more debuggably) with an uncaught exception. + + Add asserts for presence to restore the previous level of safety. + (I could have revert these patches so as to resume used of .value() + but I didn't want to just have them get broken again.) + + Differential Revision: https://reviews.llvm.org/D140340 + +commit 93129ca8d1cf618390a16e5d4315d0fd15170c51 +Author: Jean Perier +Date: Tue Dec 20 13:49:38 2022 +0100 + + [flang] Do not convey captured globals through host link + + Addresses and properties (bounds, length parameters) of host + variables associated in an internal procedure were all passed via + an extra tuple argument of the internal procedure. + This extra tuple is in general an overhead: it must be created and + passed, and require creating thunks when taking the address of the + internal procedure. + This patch allows not using the tuple for host global variables + (from modules, common block, or local saved variables) since they can + be instantiated from the fir.global symbol in the internal procedure + instead. + Add a fir.internal_proc attribute to mlir::FuncOp for internal procedures + so that ArrayValueCopy can still detect internal procedures even if they + do not have a tuple argument. + + Differential Revision: https://reviews.llvm.org/D140288 + +commit fc61400cb81200198cf5b1d53cff2d29f5909800 +Author: Jean Perier +Date: Mon Dec 19 02:40:25 2022 -0800 + + [flang] Fix llvm::Optional warning caused by D140220 + + Using llvm::Optional::value() was just deprecated in LLVM. + Remove the usage that was added by D140220 and replace it by an assert. + + https://lab.llvm.org/buildbot/#/builders/160/builds/14222 + +commit 8febe67851458645f93efa33d72717b732007ca7 +Author: Jean Perier +Date: Mon Dec 19 11:11:23 2022 +0100 + + [flang] Lower statement function references in HLFIR + + Enable lowering of statement function references in HLFIR. This follows + the same principle as statement function lowering with the current + lowering: + - Actual arguments are lowered and mapped to the statement function + dummy symbols. + - "HostAssociated" symbols are mapped to their host values (these are + the symbols referred to inside the statement function expressions that + are not statement function dummies. e.g: `x` in `stmt_func(i) = + x(i)`). + - The statement function expression is evaluated. + + evaluate::SetLength has to be lowered to deal with statement functions + returning characters since the front-end is generating one to ensure the + statement function expression value is trimmed/padded to match the statement + function declared type. + + Differential Revision: https://reviews.llvm.org/D140220 + +commit 15a9a72ee68166c0cff3f036cacd3c82be66c729 +Author: Fangrui Song +Date: Sat Dec 17 22:22:47 2022 +0000 + + [flang] llvm::Optional::value() => => operator*/operator-> + + std::optional::value() has undesired exception checking semantics and is + unavailable in older Xcode (see _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS). The + call sites block std::optional migration. + +commit 9379ca0a257780961a7e77c1a56c70d00cd85909 +Author: Valentin Clement +Date: Thu Dec 15 12:02:11 2022 +0100 + + [flang] Fix associating entity when selector is an array, pointer or allocatable + + In SELECT TYPE, within the block following TYPE IS, the associating entity is not polymorphic. + It has the type named in the type guard and other properties taken from the + selector. Within the block following a CLASS IS type guard statement, the + associating entity is polymorphic and has the declared type named in the type + guard statement. + This patch makes sure the associating entity matches the selector if it is + an array, a pointer or an allocatable. + + Reviewed By: jeanPerier + + Differential Revision: https://reviews.llvm.org/D140017 + +commit 518e6f12f37cc47be99c6d218bf07c0191c66de2 +Author: V Donaldson +Date: Mon Dec 12 14:20:06 2022 -0800 + + [flang] Submodules + + A submodule is a program unit that may contain the implementions of procedures + declared in an ancestor module or submodule. + + Processing for the equivalence groups and variables declared in a submodule + scope is similar to existing processing for the equivalence groups and + variables in module and procedure scopes. However, module and procedure scopes + are tied directly to code in the Pre-FIR Tree (PFT), whereas processing for a + submodule must have access to an ancestor module scope that is guaranteed + to be present in a .mod file, but is not guaranteed to be in the PFT. This + difference is accommodated by tying processing directly to a front end scope. + Function scopes that can be processed on the fly are done that way; the + resulting variable information is never stored. Module and submodule scopes + whose symbol information may be needed during lowering of any number of module + procedures are instead cached on first use, and reused as needed. + + These changes are a direct extension of current code. All module and submodule + variables in scope are processed, whether referenced or not. A possible + alternative would be to instead process symbols only when first used. While + this could ultimately be beneficial, such an approach must account for the + presence of equivalence groups. That information is not currently available + for on-the-fly variable processing. + + Some additional changes are needed to include submodules in places where + modules must be considered, and to include separate module procedures in + places where other subprogram variants are considered. There is also a fix + for a bug involving the use of variables in an equivalence group in a + namelist group, which also involves scope processing code. + +commit 40cb4fd0b9c9088de79435d829314fcd32ba2779 +Author: Valentin Clement +Date: Mon Dec 12 14:04:17 2022 +0100 + + [flang] Perform polymorphic pointer association with runtime call + + pointer association to a polymorphic pointer needs to potentially + update the element size in the descriptor. Update the pointer association + to polymoprhic pointer with a runtime call to PointerAssociate. + + Reviewed By: jeanPerier + + Differential Revision: https://reviews.llvm.org/D139825 + +commit 788960d6286325c59d1e8e8e85743dafd4a61476 +Author: Jean Perier +Date: Tue Dec 6 13:53:08 2022 +0100 + + [flang] Allow conversion from hlfir.expr to fir::ExtendedValue + + For now at least, the plan is to keep hlfir.expr usage limited as + sub-expression operand, assignment rhs, and a few other contexts ( + e.g. Associate statements). The rest of lowering (statements lowering + in the bridge) will still expect to get and manipulate characters and + arrays in memory. That means that hlfir.expr must be converted to + variable in converter.genExprAddr/converter.genExprBox. + + This is done using an hlfir.associate, and generating the related + hlfir.end_associate in the statement context. + + hlfir::getFirBase of is updated to avoid bringing in the HLFIR + fir.boxchar/fir.box into FIR when the entity was created with + hlfir::AssociateOp. + + Differential Revision: https://reviews.llvm.org/D139328 + +commit 491b6a9ccb05e5e6784ce50718570d204378c75e +Author: Valentin Clement +Date: Mon Dec 5 18:28:13 2022 +0100 + + [flang] Fix pointer association with remap on polymorphic entities + + Runtime is expecting a 1d array. This patch fixes the generation + of the array holding the bounds to be passed to the runtime function call. + + Reviewed By: jeanPerier, PeteSteinfeld + + Differential Revision: https://reviews.llvm.org/D139324 + +commit 42b21ddaadd3545945f29a8ccdcc89779542c30e +Author: Valentin Clement +Date: Mon Dec 5 09:35:50 2022 +0100 + + [flang] Pointer assignment with remapping involcing polymorphic entities + + Lower pointer assignment with remapping involving polymorphic entities + to runtime call to PointerAssociateRemapping. + For the time being all pointer assignment involcing polymorphic entities are + done with the runtime call. When lhs is not unlimited polymorphic + we might be able to do it inlined as well. + + Reviewed By: jeanPerier, PeteSteinfeld + + Differential Revision: https://reviews.llvm.org/D139198 + +commit 19811165482d87582d31219305f946a77208a1f2 +Author: Jean Perier +Date: Mon Dec 5 09:05:37 2022 +0100 + + [flang] Lower function return to HLFIR + + The only special thing that is needed is to update the bridge symbol + lookup to deal with the HLFIR symbol lookup (symbols are mapped to + fir::FortranVariableInterface operations, not Fortran::Lower::SymbolBox). + + Differential Revision: https://reviews.llvm.org/D139201 + +commit 9a41739565d9f7ce94da5e7d83947ead73d9bd54 +Author: Kazu Hirata +Date: Sat Dec 3 12:14:21 2022 -0800 + + [flang] Use std::nullopt instead of None (NFC) + + This patch mechanically replaces None with std::nullopt where the + compiler would warn if None were deprecated. The intent is to reduce + the amount of manual work required in migrating from Optional to + std::optional. + + This is part of an effort to migrate from llvm::Optional to + std::optional: + + https://discourse.llvm.org/t/deprecating-llvm-optional-x-hasvalue-getvalue-getvalueor/63716 + +commit f8ea349a6d4b71e04f0eff637ee1a71bb15c8aa1 +Author: Valentin Clement +Date: Fri Dec 2 15:51:01 2022 +0100 + + [flang] Perform assignment to polymorphic allocatable with runtime call + + Lower assignment to polymorphic allocatable to the `Assign` runtime + call. + + Reviewed By: jeanPerier + + Differential Revision: https://reviews.llvm.org/D139192 + +commit c44292f15b0ce0fa1866c80211b341733b043efb +Author: Valentin Clement +Date: Fri Dec 2 09:52:06 2022 +0100 + + [flang] Enable character type guard in select type + + SELECT TYPE lower and conversion was not handling + `character` type guard. This add support for it. + + Reviewed By: jeanPerier + + Differential Revision: https://reviews.llvm.org/D139106 + +commit e78e4a176147a1a971f2093b3a927f51479074ab +Author: Jean Perier +Date: Thu Dec 1 11:09:35 2022 +0100 + + [flang] lower F77 calls in HLFIR + + Use recently added hlfir.associate/hlfir.end_associate to deal + with the cases where the actual argument is an expression. + + Differential Revision: https://reviews.llvm.org/D139009 + +commit 131585ab0fd0ed43d6fd7325ff75d6fa6e623a4b +Author: Valentin Clement +Date: Thu Dec 1 11:12:01 2022 +0100 + + [flang] Use genExprBox for the rhs when calling PointerAssociate for unlimited polymorphic pointer + + In D139019 the assumption was made that the rhs was also the MutableBox + but this is not a constraint. Use genExprBox instead. Also the allowed + conversion in D139019 was not correct. Remoed it since it is not needed anymore. + + Reviewed By: jeanPerier + + Differential Revision: https://reviews.llvm.org/D139081 + +commit abefd87e706a47303905edcff031a22edf880921 +Author: Valentin Clement +Date: Wed Nov 30 18:57:28 2022 +0100 + + [flang] Delegate pointer association to class(*) pointer to the runtime + + Pointer association with an unlimited polymorphic pointer on the lhs + requires more than just updating the base_addr. Delegate the association to + the runtime function `PointerAssociation`. + + Reviewed By: PeteSteinfeld + + Differential Revision: https://reviews.llvm.org/D139019 + +commit 1bd0ff7a90593d3cf363325ff797bc5efa7928e0 +Author: Valentin Clement +Date: Wed Nov 30 15:53:01 2022 +0100 + + [flang] Allow non polymorphic pointer assignment with polymorphic rhs + + Remove the TODO and allow pointer assignment with non + polymorphic entity on the lhs. The assignment follow the same scheme + as derived-type pointer assignment to parent component. + + Reviewed By: jeanPerier + + Differential Revision: https://reviews.llvm.org/D138998 + +commit d38735e601d97d5dad4e6c7e7452632d5954f652 +Author: Valentin Clement +Date: Tue Nov 22 15:13:18 2022 +0100 + + [flang][NFC] Switch CollectBindings return to SymbolVector + + As suggested on D138129, switching rteurn of CollectBindings + function to SymbolVector. + + Reviewed By: jeanPerier + + Differential Revision: https://reviews.llvm.org/D138419 + +commit 81bd5e2ef7332f8ccad5ffeef931ed7d87d548e7 +Author: Valentin Clement +Date: Tue Nov 22 10:11:50 2022 +0100 + + Revert "[flang][NFC] Switch CollectBindings return to SymbolVector" + + This reverts commit 97e8eeb758fcae4f2afd9ac516ffc9509b4daaf0. + +commit 97e8eeb758fcae4f2afd9ac516ffc9509b4daaf0 +Author: Valentin Clement +Date: Tue Nov 22 09:42:32 2022 +0100 + + [flang][NFC] Switch CollectBindings return to SymbolVector + + As suggested on D138129, switching rteurn of CollectBindings + function to SymbolVector. + + Reviewed By: jeanPerier + + Differential Revision: https://reviews.llvm.org/D138419 + +commit 6393d2ea24fb458c353f8d453ab5f20663875cb1 +Author: Valentin Clement +Date: Thu Nov 17 10:53:13 2022 +0100 + + [flang] Create fir.dispatch_table and fir.dt_entry operations + + Create the fir.dispatch_table operation based on semantics + information. The fir.dispatch_table will be used for static devirtualization + as well as for fir.select_type conversion. + + Depends on D138129 + + Reviewed By: jeanPerier, PeteSteinfeld + + Differential Revision: https://reviews.llvm.org/D138131 + +commit dd73bfa6d6da9435cef8fc1e759abd5fad32fa50 +Author: Jean Perier +Date: Tue Nov 15 12:01:21 2022 +0100 + + [flang] Lower intrinsic assignment to fir.assign + + Lower intrinsic assignment to hlfir.assign, except when the LHS + is a whole allocatable (this part will be done later to keep patch + simpler). + + Differential Revision: https://reviews.llvm.org/D138013 + +commit f677c5ee97911561c9948684029aef15b1f5cdd0 +Author: Valentin Clement +Date: Mon Nov 14 10:46:53 2022 +0100 + + [flang] Initial lowering of SELECT TYPE construct to fir.select_type operation + + This patch is the initial path to lower the SELECT TYPE construct to the + fir.select_type operation. More work is required in the AssocEntity + mapping but it will be done in a follow up patch to ease the review. + + Reviewed By: jeanPerier + + Differential Revision: https://reviews.llvm.org/D137728 + +commit fcfb620db55ec6fde832254f38a207da24399a2d +Author: Jean Perier +Date: Mon Nov 14 10:37:04 2022 +0100 + + [flang][NFC] rename hlfir::FortranEntity into EntityWithAttributes + + This reflects the fact that Attributes will not always be visible when + looking at an HLFIR variable. The EntityWithAttributes class is used + to denote in the compiler code that the value at hand has visible + attributes. It is intended to be used in lowering so that the code + can query about operands attributes when generating code. + + Differential Revision: https://reviews.llvm.org/D137792 + +commit 8f3f15c1a208932689a8bdef22d6ca3d4c3408c5 +Author: Slava Zakharin +Date: Mon Nov 7 09:05:27 2022 -0800 + + [flang] Configure FirOpBuilder based on math driver options. + + Added MathOptionsBase to share fastmath config between different + components. Frontend driver translates LangOptions into MathOptionsBase. + FirConverter configures FirOpBuilder using MathOptionsBase + config passed to it via LoweringOptions. + + Depends on D137390 + + Reviewed By: jeanPerier + + Differential Revision: https://reviews.llvm.org/D137391 + +commit 3952377f71dc1d1aa1627c4c7f82d51163b8fa80 +Author: Jean Perier +Date: Wed Nov 2 08:36:45 2022 +0100 + + [flang] lower intrinsic constants to HLFIR + + Use the utility to lower Constant that was split from current lowering + in https://reviews.llvm.org/D136955. + + The difference in HLFIR is the addition of a fir.declare on constant + outlined in memory so that all the information about them is available. + + Lowering to HLFIR is enabled in Brideg::genExprValue to allow testing + of scalar constant lowering. + + Differential Revision: https://reviews.llvm.org/D137084 + +commit 880b37f175c7f7ce9e5684ecb2713de66f79cec7 +Author: Valentin Clement +Date: Tue Nov 1 21:46:07 2022 +0100 + + [flang] Handle pointer assignment with polymorphic entities + + This patch forces pointer and allocatable polymorphic entities to be + tracked as descriptor. It also enables the pointer assignment between + polymorphic entities. Pointer association between a non-polymorphic + pointer and a polyrmophic target might require some more work as + per 10.2.2.3 point 1. + + Reviewed By: PeteSteinfeld + + Differential Revision: https://reviews.llvm.org/D137150 + +commit 90e9fcbb68a3afa3ac25aa54555355634554c349 +Author: Valentin Clement +Date: Mon Oct 31 11:02:50 2022 +0100 + + [flang] Set declared type when NULLIFY a polymorphic pointer + + Fortran standard 7.3.2.3 point 7 mentions that a diassociated + pointer dynamic type is its declared type. + in 9.7.2 note 1, when a NULLIFY statement is applied to a polymorphic pointer, + its dynamic type becomes the same as its declared type. + This patch enforce these standard points by calling the runtime function + `PointerNullifyDerived` with the declared type descriptor. + + Reviewed By: jeanPerier + + Differential Revision: https://reviews.llvm.org/D136948 + +commit c14ef2d762e4f7de2d892af111d87aec79b8cd6f +Author: Jean Perier +Date: Mon Oct 24 15:35:19 2022 +0200 + + [flang] Add kernel to lower expressions to HLFIR + + This patch adds the kernel to lower evaluate::Expr to HLFIR to a + hlfir::FortranEntity (a single mlir::Value that can be interpreted as + a Fortran variable or the value of a Fortram expression). + + It implements lowering of simple name designators ("x") and starts + adding a translation layer in AbstractConverter::genExprBox and + AbstractConverter::genExprAddr so that the new expression lowering + can be used without any changes for now in the current statement and + construct lowering. + + Differential Revision: https://reviews.llvm.org/D136453 + +commit 9e37301cf483237695325e199393ba5a84b7fc1e +Author: Jean Perier +Date: Tue Oct 18 11:07:47 2022 +0200 + + [flang][NFC] Simplify mapSymbolAttributes in symbol lowering + + mapSymbolAttributes currently has a lot of very similar code for + each kind of explicit shape and scalar symbols. + + Refactor it so that the change to lower symbols with fir.declare + can be added in centralized places instead of being scattered. + This is a preparation patch and fir.declare is not yet added. + + Differential Revision: https://reviews.llvm.org/D136061 + +commit 205b47401ed6517cc34e473838e1d307715cc9a4 +Author: Peixin Qiao +Date: Mon Oct 17 23:27:17 2022 +0800 + + [flang] Fix the trivial type passed as value with bind(C) + + In the callee side, the value cannot be used directly. For example, the + dummy argument is lhs variable or the dummy argument is passed to + another procedure as actual argument. + + Fix this by allocating one temporary storage and store the value. Then + map the symbol of dummy argument to the `mlir::Value` of the temporary. + + Reviewed By: jeanPerier + + Differential Revision: https://reviews.llvm.org/D136009 + +commit 4546397e39589f0a6a707218349d1bf65fe54645 +Author: Jean Perier +Date: Mon Oct 17 09:57:16 2022 +0200 + + [flang] Introduce option to lower expression to HLFIR + + Preliminary work on HLFIR. Introduce option that will allow testing + lowering via HLFIR until this is ready to replace the current expression + lowering. + + See https://reviews.llvm.org/D134285 for more context about the plan. + + Differential Revision: https://reviews.llvm.org/D135959 + +commit 0ec3ac9b7fbd15698af7289e1214e8ff3d82ec14 +Author: Jonathon Penix +Date: Tue Jul 19 11:47:25 2022 -0700 + + [Flang] Add -fconvert option to swap endianness for unformatted files. + + To accomplish this, this patch creates an optional list of environment + variable default values to be set by the runtime to allow directly using + the existing runtime implementation of FORT_CONVERT for I/O conversions. + +commit f4accbf55f4d0fcd6d7cc6f7632a0e4b69c9f3dd +Author: Peixin Qiao +Date: Wed Oct 5 20:22:33 2022 +0800 + + [flang][OpenMP] Support privatization for single construct + + This supports the lowering of private and firstprivate clauses in single + construct. The alloca ops are emitted in the entry block according to + https://llvm.org/docs/Frontend/PerformanceTips.html#use-of-allocas, and + the load/store ops are emitted in the single region. The data race + problem is handled in OMPIRBuilder. That is, the barrier is emitted in + OMPIRBuilder. + + Co-authored-by: Nimish Mishra + + Reviewed By: kiranchandramohan + + Differential Revision: https://reviews.llvm.org/D128596 + +commit de3efd1b4c8e120c37b24e7cc264b5a117641bb1 +Author: Valentin Clement +Date: Sat Sep 24 08:58:50 2022 +0200 + + [flang] Lower character result of bind(c) function by value + + BIND(C) Function returning character must return it by value and + not as hidden argument like done currently. This patch update the + code to return it by value for both use cases. + + Reviewed By: PeteSteinfeld + + Differential Revision: https://reviews.llvm.org/D134530 + +commit e6238ab52590a4f31bc8c6e806c7947e9ef04b57 +Author: Jean Perier +Date: Tue Sep 20 10:39:39 2022 +0200 + + [flang] Deallocate WHERE masks after all assignments + + Allocatable assignments were triggering lowering to clean-up + any WHERE mask temporaries, even if some assignments where left + in the WHERE construct. + + This is because allocatable assignments lowering was being passed the + wrong statement context. Fix this by selecting the where/forall statement + context instead of a local one when there is one. + + Differential Revision: https://reviews.llvm.org/D134197 + +commit 78c40b3c53ccfa272528d7d4f03d5f25d0b7806e +Author: V Donaldson +Date: Wed Sep 7 21:22:59 2022 -0700 + + [flang] Control flow with empty select case blocks + + Fix control flow for empty select case blocks such as: + + select case (2) + case (1) + print*, '1' + case (2) + ! print*, '2' + case default + print*, 'default' + end select + +commit 109f9a291850a8e82f5026f68a382222a235c4f3 +Author: Peixin Qiao +Date: Tue Sep 6 08:15:01 2022 +0800 + + [flang] Support lowering of intrinsic module procedure C_F_POINTER + + As Fortran 2018 18.2.3.3, the intrinsic module procedure + C_F_POINTER(CPTR, FPTR [, SHAPE]) associates a data pointer with the + target of a C pointer and specify its shape. CPTR shall be a scalar of + type C_PTR, and its value is the C address or the result of a reference + to C_LOC. FPTR is one pointer, either scalar or array. SHAPE is a + rank-one integer array, and it shall be present if and only if FPTR is + an array. + + C_PTR is the derived type with only one component of integer 64, and the + integer 64 component value is the address. Build the right "source" + fir::ExtendedValue based on the address and shape, and use + associateMutableBox to associate the pointer with the target of the C + pointer. + + Refactor the getting the address of C_PTR to reuse the code. + + Reviewed By: jeanPerier + + Differential Revision: https://reviews.llvm.org/D132303 + +commit 8fbc7e0869293b9b61b7bce3edfa4de05b1c549d +Author: Slava Zakharin +Date: Thu Sep 1 12:33:39 2022 -0700 + + [flang] Make use of do variable more consistent. + + Instead of using the IV block argument of the do-loop we will use + the do-variable value loaded from its location. This usage is consistent + with other uses of the do-variable inside the loop. + + Differential Revision: https://reviews.llvm.org/D133140 + +commit 4943dbdf67bad8ddb6dbb6e31e4ce9a80ffd9097 +Author: Peixin Qiao +Date: Mon Aug 29 22:29:34 2022 +0800 + + [flang] Support lowering of C_PTR and C_FUNPTR argument with VALUE attribute + + As Fortran 2018 18.3.2, C_PTR is interoperable with any C object pointer + type. C_FUNPTR is interoperable with any C function pointer type. As + 18.3.6, a C pointer can correspond to a Fortran dummy argument of type + C_PTR with the VALUE attribute. + + The interface for type(C_PTR)/type(C_FUNPTR) argument with value + attribute is different from the the usual derived type. For type(C_PTR) + or type(C_FUNPTR), the component is the address, and the interface is + a pointer even with VALUE attribute. For a usual derived type such as + the drived type with the component of integer 64, the interface is a i64 + value when it has VALUE attribute on aarch64 linux. + + To lower the type(C_PTR)/type(C_FUNPTR) argument with value attribute, + get the value of the component of the type(C_PTR)/type(C_FUNPTR), which + is the address, and then convert it to the pointer and pass it. + + Reviewed By: Jean Perier + + Differential Revision: https://reviews.llvm.org/D131583 + +commit af7edf1557d87026bb4dd4783f60e766538e923c +Author: Slava Zakharin +Date: Thu Aug 18 14:06:19 2022 -0700 + + [flang] Keep original data type for do-variable value. + + Keep the original data type of integer do-variables + for structured loops. When do-variable's data type + is an integer type shorter than IndexType, processing + the do-variable separately from the DoLoop's iteration index + allows getting rid of type casts, which can make backend + optimizations easier. + + For example, + ``` + do i = 2, n-1 + do j = 2, n-1 + ... = a(j-1, i) + end do + end do + ``` + + If value of 'j' is computed by casting the DoLoop's iteration + index to 'i32', then Flang will produce the following LLVM IR: + ``` + %1 = trunc i64 %iter_index to i32 + %2 = sub i32 %1, 1 + %3 = sext i32 %2 to i64 + ``` + + LLVM's InstCombine may try to get rid of the sign extension, + and may transform this into: + ``` + %1 = shl i64 %iter_index, 32 + %2 = add i64 %1, -4294967296 + %3 = ashr exact i64 %2, 32 + ``` + + The extra computations for the element address applied on top + of this awkward pattern confuse LLVM vectorizer so that + it does not recognize the unit-strided access of 'a'. + + Measured performance improvements on `SPEC CPU2000@IceLake`: + ``` + 168.wupwise: 11.96% + 171.swim: 11.22% + 172.mrgid: 56.38% + 178.galgel: 7.29% + 301.apsi: 8.32% + ``` + + Differential Revision: https://reviews.llvm.org/D132176 + +commit 06b551c944ff1cb4a21ca39c9e5ee6f67fc282ee +Author: Kazu Hirata +Date: Sat Aug 20 21:18:27 2022 -0700 + + Use llvm::is_contained (NFC) + +commit 83fa97567949ce16d58b62ecff48930efb1d80bb +Author: Valentin Clement +Date: Sat Aug 13 20:40:03 2022 +0200 + + [flang][openacc] Handle array section and derived-type components operands + + This patch lowers correctly operands with array section + and derived-type component. + + Depends on D131764 + + Reviewed By: razvanlupusoru + + Differential Revision: https://reviews.llvm.org/D131765 + +commit 8fc00247cea5ac8290657867581e22b876a09d3b +Author: Valentin Clement +Date: Fri Aug 12 21:22:30 2022 +0200 + + [flang] Pass SemanticsContext to the LoweringBridge + + The SemanticsContext is needed to analyze expression later in the + lowering for directive languages. This patch allows to keep a reference of + the SemanticsContext in the LoweringBridge. + + Building block for D131765 + + Reviewed By: razvanlupusoru + + Differential Revision: https://reviews.llvm.org/D131764 + +commit 435feefbdd6c91faf24fa5e69c4e7c3bc127568a +Author: Nimish Mishra +Date: Fri Aug 12 16:46:26 2022 +0530 + + [flang][OpenMP] Lowering support for default clause + + This patch adds lowering support for default clause. + + 1. During symbol resolution in semantics, should the enclosing context + have a default data sharing clause defined and a `parser::Name` is not + attached to an explicit data sharing clause, the + `semantics::Symbol::Flag::OmpPrivate` flag (in case of + default(private)) and `semantics::Symbol::Flag::OmpFirstprivate` flag + (in case of default(firstprivate)) is added to the symbol. + + 2. During lowering, all symbols having either + `semantics::Symbol::Flag::OmpPrivate` or + `semantics::Symbol::Flag::OmpFirstprivate` flag are collected and + privatised appropriately. + + Co-authored-by: Peixin Qiao + + Reviewed by: peixin + + Differential Revision: https://reviews.llvm.org/D123930 + +commit 30b779d515e6aa9516f47d8c0ca54b11aaf98cb2 +Author: Nimish Mishra +Date: Fri Aug 12 16:45:38 2022 +0530 + + Revert "[flang][OpenMP] Lowering support for default clause" + + This reverts commit 6a305c9b49dd28eaeae694fce5755e279fbc884c. + +commit 6a305c9b49dd28eaeae694fce5755e279fbc884c +Author: Nimish Mishra +Date: Fri Aug 12 22:05:43 2022 +0530 + + [flang][OpenMP] Lowering support for default clause + + This patch adds lowering support for default clause. + + 1. During symbol resolution in semantics, should the enclosing context have + a default data sharing clause defined and a `parser::Name` is not attached + to an explicit data sharing clause, the + `semantics::Symbol::Flag::OmpPrivate` flag (in case of default(private)) + and `semantics::Symbol::Flag::OmpFirstprivate` flag (in case of + default(firstprivate)) is added to the symbol. + + 2. During lowering, all symbols having either + `semantics::Symbol::Flag::OmpPrivate` or + `semantics::Symbol::Flag::OmpFirstprivate` flag are collected and + privatised appropriately. + + Co-authored-by: Peixin Qiao + + Reviewed by: peixin + + Differential Revision: https://reviews.llvm.org/D123930 + +commit f1eb945f9a5037b1fac6da02405047b24c0c2de5 +Author: Slava Zakharin +Date: Tue Jul 19 20:39:58 2022 -0700 + + [flang] Propagate lowering options from driver. + + This commit addresses concerns raised in D129497. + + Propagate lowering options from driver to expressions lowering + via AbstractConverter instance. A single use case so far is + using optimized TRANSPOSE lowering with O1/O2/O3. + + bbc does not support optimization level switches, so it uses + default LoweringOptions (e.g. optimized TRANSPOSE lowering + is enabled by default, but an engineering -opt-transpose=false + option can still override this). + + Differential Revision: https://reviews.llvm.org/D130204 + +commit 4640a8a22cbae50c03c5f41d145916258a17a491 +Author: Peixin Qiao +Date: Wed Jul 27 23:37:55 2022 +0800 + + [NFC][flang] Add FIXME for privatization of loop bounds in Bridge.cpp + + There is post comment of adding TODO/FIXME for privatization of loop + bounds in D127137. D127137 fixes the bug in OpenMP firstprivate clause, + which should be refactored later according to the post comment. Add + FIXME for it. + + Differential Revision: https://reviews.llvm.org/D130625 + +commit 7ac2500eb05fde69235a4dfbc435dc9362fbcd83 +Author: Peixin Qiao +Date: Wed Jul 27 23:17:54 2022 +0800 + + [NFC][flang] Remove the unused header in Bridge.cpp + + The header file OpenMPDialect.h is added in Bridge.cpp in D130027, + but it is unused. Remove it. + + Differential Revision: https://reviews.llvm.org/D130625 + +commit f2b7f18e6375bb7eb6232f35c9ca30da8e20d7c7 +Author: Shraiysh Vaishay +Date: Tue Jul 26 19:18:27 2022 +0530 + + Revert "[flang][OpenMP] Lowering support for default clause" + + This reverts commit 05e6fce84fd39d150195b8928561f2c90c71e538. + +commit 05e6fce84fd39d150195b8928561f2c90c71e538 +Author: Nimish Mishra +Date: Tue Jul 26 14:08:34 2022 +0530 + + [flang][OpenMP] Lowering support for default clause + + This patch adds lowering support for default clause. + + 1. During symbol resolution in semantics, should the enclosing context have + a default data sharing clause defined and a `parser::Name` is not attached + to an explicit data sharing clause, the + `semantics::Symbol::Flag::OmpPrivate` flag (in case of `default(private)`) + and `semantics::Symbol::Flag::OmpFirstprivate` flag (in case of + `default(firstprivate)`) is added to the symbol. + + 2. During lowering, all symbols having either + `semantics::Symbol::Flag::OmpPrivate` or + `semantics::Symbol::Flag::OmpFirstprivate` flag are collected and + privatised appropriately. + + Co-authored-by: Peixin Qiao + + Reviewed By: kiranchandramohan + + Differential Revision: https://reviews.llvm.org/D123930 + +commit 3356d72a5ffa69324f8fdbc067c440cda8154797 +Author: Kazu Hirata +Date: Mon Jul 25 23:01:01 2022 -0700 + + [flang] Use value or * instead of getValue (NFC) + + This patch replaces x.getValue() with *x if the reference is obviously + protected by a presence check. Otherwise, it replaces x.getValue() + with x.value(). + +commit 17d9bdf4601983491d2b6f28e2b6b5a36f2688c7 +Author: Arnamoy Bhattacharyya +Date: Mon Jul 25 20:31:23 2022 -0400 + + [Flang][OpenMP] Add support for lastprivate clause for worksharing loop. + + This patch adds an initial support to the lastprivate clause for worksharing loop. The patch creates necessary control flow to guarantee the store of the value from the logical last iteration of the workshare loop. + + Reviewed By: kiranchandramohan + + Differential Revision: https://reviews.llvm.org/D130027 + +commit 7bb1151ba21e26d91ddaa83177bb58b4d1c36710 +Author: Kiran Chandramohan +Date: Mon Jul 25 18:21:17 2022 +0000 + + [Flang][OpenMP] Initial support for integer reduction in worksharing-loop + + Lower the Flang parse-tree containing OpenMP reductions to the OpenMP + dialect. The OpenMP dialect models reductions with, + 1) A reduction declaration operation that specifies how to initialize, combine, + and atomically combine private reduction variables. + 2) The OpenMP operation (like wsloop) that supports reductions has an array of + reduction accumulator variables (operands) and an array attribute of the same + size that points to the reduction declaration to be used for the reduction + accumulation. + 3) The OpenMP reduction operation that takes a value and an accumulator. + This operation replaces the original reduction operation in the source. + + (1) is implemented by the `createReductionDecl` in OpenMP.cpp, + (2) is implemented while creating the OpenMP operation, + (3) is implemented by the `genOpenMPReduction` function in OpenMP.cpp, and + called from Bridge.cpp. The implementation of (3) is not very robust. + + NOTE 1: The patch currently supports only reductions for integer type addition. + NOTE 2: Only supports reduction in the worksharing loop. + NOTE 3: Does not generate atomic combination region. + NOTE 4: Other options for creating the reduction operation include + a) having the reduction operation as a construct containing an assignment + and then handling it appropriately in the Bridge. + b) we can modify `genAssignment` or `genFIR(AssignmentStmt)` in the Bridge to + handle OpenMP reduction but so far we have tried not to mix OpenMP + and non-OpenMP code and this will break that. + I will try (b) in a separate patch. + NOTE 5: OpenMP dialect gained support for reduction with the patches: + D105358, D107343. See https://discourse.llvm.org/t/rfc-openmp-reduction-support/3367 + for more details. + + Reviewed By: awarzynski + + Differential Revision: https://reviews.llvm.org/D130077 + + Co-authored-by: Peixin-Qiao + +commit d507e8b70e4668f891d5df03f966c154cc4d5370 +Author: Arnamoy Bhattacharyya +Date: Mon Jul 11 09:01:15 2022 -0400 + + [flang][OpenMP] Fix firstprivate bug + + In case where the bound(s) of a workshare loop use(s) firstprivate var(s), currently, that use is not updated with the created clone. It still uses the shared variable. This patch fixes that. + + Reviewed By: peixin + + Differential Revision: https://reviews.llvm.org/D127137 + +commit 53804e426d9b552adaa1adb86a2df9014c41d42a +Author: Valentin Clement +Date: Thu Jul 7 09:37:12 2022 +0200 + + [flang][NFC] Make LEN parameters homogenous + + This patch is part of the upstreaming effort from fir-dev branch. + This is the last patch for the upstreaming effort. + + Reviewed By: jeanPerier + + Differential Revision: https://reviews.llvm.org/D129187 + + Co-authored-by: Eric Schweitz + +commit 23c2bedfd93cfacc62009425c464e659a34e92e6 +Author: Peter Klausler +Date: Fri Jul 1 11:40:44 2022 -0700 + + [flang] Establish a single source of target information for semantics + + Create a TargetCharacteristics class to centralize the few items of + target specific information that are relevant to semantics. Use the + new class for all target queries, including derived type component layout + modeling. + + Future work will initialize this class with target information + provided or forwarded by the drivers, and use it to fold layout-dependent + intrinsic functions like TRANSFER(). + + Differential Revision: https://reviews.llvm.org/D129018 + + Updates: Attempts to work around build issues on Windows. + +commit 0dd4fb0408ed980de2c100905e68a4adf9987c61 +Author: Valentin Clement +Date: Fri Jul 1 10:36:45 2022 +0200 + + [flang] Fix for broken/degenerate forall case + + Fix for broken/degenerate forall case where there is no assignment to an + array under the explicit iteration space. While this is a multiple + assignment, semantics only raises a warning. + The fix is to add a test that the explicit space has any sort of array + to be updated, and if not then the do_loop nest will not require a + terminator to forward array values to the next iteration. + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: jeanPerier + + Differential Revision: https://reviews.llvm.org/D128973 + + Co-authored-by: Eric Schweitz + +commit 39377d52273edb53a371f32a862df82f6b7f239d +Author: Valentin Clement +Date: Fri Jul 1 08:29:19 2022 +0200 + + [flang] Fix APFloat conversion cases + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: PeteSteinfeld + + Differential Revision: https://reviews.llvm.org/D128935 + + Co-authored-by: Eric Schweitz + Co-authored-by: Peter Steinfeld + +commit a19c2132122e78051bfc98c304afb528b7ca14ec +Author: Valentin Clement +Date: Thu Jun 30 16:56:29 2022 +0200 + + [flang][NFC] Fix warning + +commit 1e55ec6666fa687b1a86bdaa95ea814557855fd1 +Author: Valentin Clement +Date: Thu Jun 30 09:03:49 2022 +0200 + + [flang] SELECT CASE constructs with character selectors that require a temp + + Here is a character SELECT CASE construct that requires a temp to hold the + result of the TRIM intrinsic call: + + ``` + module m + character(len=6) :: s + contains + subroutine sc + n = 0 + if (lge(s,'00')) then + select case(trim(s)) + case('11') + n = 1 + case default + continue + case('22') + n = 2 + case('33') + n = 3 + case('44':'55','66':'77','88':) + n = 4 + end select + end if + print*, n + end subroutine + end module m + ``` + + This SELECT CASE construct is implemented as an IF/ELSE-IF/ELSE comparison + sequence. The temp must be retained until some comparison is successful. + At that point the temp may be freed. Generalize statement context processing + to allow multiple finalize calls to do this, such that the program always + executes exactly one freemem call. + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: klausler, vdonaldson + + Differential Revision: https://reviews.llvm.org/D128852 + + Co-authored-by: V Donaldson + +commit 3b7c3a654c9175f41ac871a937cbcae73dfb3c5d +Author: Kazu Hirata +Date: Sat Jun 25 11:56:50 2022 -0700 + + Revert "Don't use Optional::hasValue (NFC)" + + This reverts commit aa8feeefd3ac6c78ee8f67bf033976fc7d68bc6d. + +commit aa8feeefd3ac6c78ee8f67bf033976fc7d68bc6d +Author: Kazu Hirata +Date: Sat Jun 25 11:55:57 2022 -0700 + + Don't use Optional::hasValue (NFC) + +commit 27afb362b1e85dac21744b95ed9b48f7e9fd016c +Author: Peixin-Qiao +Date: Fri Jun 24 15:33:09 2022 +0800 + + [flang][OpenMP] Initial support the lowering of copyin clause + + This supports the lowering of copyin clause initially. The pointer, + allocatable, common block, polymorphic varaibles will be supported + later. + + This also includes the following changes: + + 1. Resolve the COPYIN clause and make the entity as host associated. + + 2. Fix collectSymbolSet by adding one option to control collecting the + symbol itself or ultimate symbol of it so that it can be used + explicitly differentiate the host and associated variables in + host-association. + + 3. Add one helper function `lookupOneLevelUpSymbol` to differentiate the + usage of host and associated variables explicitly. The previous + lowering of firstprivate depends on the order of + `createHostAssociateVarClone` and `lookupSymbol` of host symbol. With + this fix, this dependence is removed. + + 4. Reuse `copyHostAssociateVar` for copying operation of COPYIN clause. + + Reviewed By: kiranchandramohan, NimishMishra + + Differential Revision: https://reviews.llvm.org/D127468 + +commit ab89c132b547951945788fc2a0969cf64b0df4cd +Author: Valentin Clement +Date: Thu Jun 23 14:57:24 2022 +0200 + + [flang] Add lowering TODO for separate module procedures + + MODULE FUNCTION and MODULE SUBROUTINE currently cause lowering crash: + "symbol is not mapped to any IR value" because special care is needed + to handle their interface. + + Add a TODO for now. + + Example of program that crashed and will hit the TODO: + + ``` + module mod + interface + module subroutine sub + end subroutine + end interface + contains + module subroutine sub + x = 42 + end subroutine + end module + ``` + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: jeanPerier + + Differential Revision: https://reviews.llvm.org/D128412 + + Co-authored-by: Jean Perier + +commit ed8fceaa09cd66324c6efc1070f962731a62e2dc +Author: Kazu Hirata +Date: Mon Jun 20 23:35:53 2022 -0700 + + Don't use Optional::getValue (NFC) + +commit 5413bf1bac2abb9e06901686cdc959e92940143a +Author: Kazu Hirata +Date: Mon Jun 20 11:33:56 2022 -0700 + + Don't use Optional::hasValue (NFC) + +commit 331145e6e979ddb115e3bfd44d282828994d4e23 +Author: Valentin Clement +Date: Mon Jun 20 15:43:44 2022 +0200 + + [flang][NFC] Unify todo messages + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: jeanPerier + + Differential Revision: https://reviews.llvm.org/D128186 + + Co-authored-by: Peter Steinfeld + +commit 84b9ae662419ce97b3cb13879be431f6a0c9eaa4 +Author: Mats Petersson +Date: Tue Jun 7 14:00:08 2022 +0100 + + [flang]Add support for do concurrent + + [flang]Add support for do concurrent + + Upstreaming from fir-dev on https://github.com/flang-compiler/f18-llvm-project + + Support for concurrent execution in do-loops. + + A selection of tests are also added. + + Co-authored-by: V Donaldson + + Reviewed By: kiranchandramohan + + Differential Revision: https://reviews.llvm.org/D127240 + +commit 494cd9b6fc7ff3e9d9bfad08cfd329763b799749 +Author: Andrzej Warzynski +Date: Fri Jun 10 15:26:13 2022 +0000 + + [flang][lowering] Ignore compiler directives + + This patch simply replaces a `TODO` with a warning. + + This is part of the upstreaming effort from the `fir-dev` branch in [1]. + + [1] https://github.com/flang-compiler/f18-llvm-project + + Co-authored-by: Eric Schweitz + + Reviewed By: kiranchandramohan + + Differential Revision: https://reviews.llvm.org/D127415 + +commit 5b66cc1000f999e3fbf463a1a722249b8a6e51cb +Author: Valentin Clement +Date: Fri Jun 10 08:50:40 2022 +0200 + + [flang][NFC] Move Todo.h from Lower to Optimizer + + Remove a backwards dependence from Optimizer -> Lower by moving Todo.h + to the optimizer and out of lowering. + + This patch is part of the upstreaming effort from fir-dev branch. + + Co-authored-by: Eric Schweitz + + Reviewed By: jeanPerier + + Differential Revision: https://reviews.llvm.org/D127292 + +commit 7eecfc077f36fe249d5457e2d9a0e294cb25d615 +Author: Kiran Chandramohan +Date: Tue Jun 7 09:57:38 2022 +0000 + + [Flang] Add flag dependent code to execute the loop-body atleast once + + Given the flag `--always-execute-loop-body` the compiler emits code + to execute the body of the loop atleast once. + + Note: This is part of upstreaming from the fir-dev branch of + https://github.com/flang-compiler/f18-llvm-project. + + Reviewed By: awarzynski, schweitz + + Differential Revision: https://reviews.llvm.org/D127128 + + Co-authored-by: Jean Perier + Co-authored-by: Eric Schweitz + Co-authored-by: V Donaldson + Co-authored-by: Valentin Clement + Co-authored-by: Sameeran Joshi + +commit 411bd2d40788c8cb869dc4fdc37e01a57213cda9 +Author: Peixin-Qiao +Date: Tue Jun 7 15:08:17 2022 +0800 + + [flang][OpenMP] Support lowering parse-tree to MLIR for threadprivate directive + + This supports lowering parse-tree to MLIR for threadprivate directive + following the OpenMP 5.1 [2.21.2] standard. Take the following as an + example: + + ``` + program m + integer, save :: i + !$omp threadprivate(i) + call sub(i) + !$omp parallel + call sub(i) + !$omp end parallel + end + ``` + ``` + func.func @_QQmain() { + %0 = fir.address_of(@_QFEi) : !fir.ref + %1 = omp.threadprivate %0 : !fir.ref -> !fir.ref + fir.call @_QPsub(%1) : (!fir.ref) -> () + omp.parallel { + %2 = omp.threadprivate %0 : !fir.ref -> !fir.ref + fir.call @_QPsub(%2) : (!fir.ref) -> () + omp.terminator + } + return + } + ``` + + A threadprivate operation (omp.threadprivate) is created for all + references to a threadprivate variable. The runtime will appropriately + return a threadprivate var (%1 as above) or its copy (%2 as above) + depending on whether it is outside or inside a parallel region. For + threadprivate access outside the parallel region, the threadprivate + operation is created in instantiateVar. Inside the parallel region, it + is created in createBodyOfOp. + + One new utility function collectSymbolSet is created for collecting + all the variables with a property within a evaluation, which may be one + Fortran, or OpenMP, or OpenACC construct. + + Reviewed By: kiranchandramohan + + Differential Revision: https://reviews.llvm.org/D124226 + +commit 8c349d707ec2677e6235b4e9e3efa1e0c4de11f2 +Author: Kiran Chandramohan +Date: Wed Jun 1 11:48:20 2022 +0000 + + [Flang] Lower the infinite do loop + + The basic infinite loop is lowered to a branch to the body of the + loop, and the body containing a back edge as its terminator. + + Note: This is part of upstreaming from the fir-dev branch of + https://github.com/flang-compiler/f18-llvm-project. + + Reviewed By: rovka + + Differential Revision: https://reviews.llvm.org/D126697 + + Co-authored-by: Eric Schweitz + Co-authored-by: V Donaldson + +commit a159128811ae1d0a781dbc8590fa1e0f26642f82 +Author: Diana Picus +Date: Tue May 31 10:55:56 2022 +0000 + + [flang] Upstream lowering of real control loops + + Upstream the code for handling loops with real control variables from + the fir-dev branch at + https://github.com/flang-compiler/f18-llvm-project/tree/fir-dev/ + + Also add a test. + + Loops with real-valued control variables are always lowered to + unstructured loops. The real-valued control variables are handled the + same as integer ones, the only difference is that they need to use + floating point instructions instead of the integer equivalents. + + Co-authored-by: V Donaldson + +commit 11fb1aa5a40885188b014b3ccd326cc92e4a3b9e +Author: Diana Picus +Date: Wed May 25 12:51:10 2022 +0000 + + [flang] Upstream the lowering of the while loop + + Upstream the code for handling while loops from the fir-dev branch at + https://github.com/flang-compiler/f18-llvm-project/tree/fir-dev/ + + Also add tests. + + The while loop is lowered to a header block that checks the loop + condition and branches either to the exit block or to the body of the + loop. The body of the loop will unconditionally branch back to the + header. + + Differential Revision: https://reviews.llvm.org/D126636 + + Co-authored-by: Eric Schweitz + Co-authored-by: V Donaldson + +commit 1e1f60c605a9b1c803f3bbb1a1339c9bb1af4e34 +Author: V Donaldson +Date: Tue May 24 10:06:24 2022 -0700 + + [flang] Alternate entry points with unused arguments + + A dummy argument in an entry point of a subprogram with multiple + entry points need not be defined in other entry points. It is only + legal to reference such an argument when calling an entry point that + does have a definition. An entry point without such a definition + needs a local "substitute" definition sufficient to generate code. + It is nonconformant to reference such a definition at runtime. + Most such definitions and associated code will be deleted as dead + code at compile time. However, that is not always possible, as in + the following code. This code is conformant if all calls to entry + point ss set m=3, and all calls to entry point ee set n=3. + + subroutine ss(a, b, m, d, k) ! no x, y, n + integer :: a(m), b(a(m)), m, d(k) + integer :: x(n), y(x(n)), n + integer :: k + 1 print*, m, k + print*, a + print*, b + print*, d + if (m == 3) return + entry ee(x, y, n, d, k) ! no a, b, m + print*, n, k + print*, x + print*, y + print*, d + if (n /= 3) goto 1 + end + + integer :: xx(3), yy(5), zz(3) + xx = 5 + yy = 7 + zz = 9 + call ss(xx, yy, 3, zz, 3) + call ss(xx, yy, 3, zz, 3) + end + + Lowering currently generates fir::UndefOp's for all unused arguments. + This is usually ok, but cases such as the one here incorrectly access + unused UndefOp arguments for m and n from an entry point that doesn't + have a proper definition. + + The problem is addressed by creating a more complete definition of an + unused argument in most cases. This is implemented in large part by + moving the definition of an unused argument from mapDummiesAndResults + to mapSymbolAttributes. The code in mapSymbolAttributes then chooses + one of three code generation options, depending on information + available there. + + This patch deals with dummy procedures in alternate entries, and adds + a TODO for procedure pointers (the PFTBuilder is modified to analyze + procedure pointer symbol so that they are not silently ignored, and + instead hits proper TODOs). + + BoxAnalyzer is also changed because assumed-sized arrays were wrongfully + categorized as constant shape arrays. This had no impact, except when + there were unused entry points. + + Co-authored-by: jeanPerier + + Differential Revision: https://reviews.llvm.org/D125867 + +commit 3b390a1682232a0d6921692f72fac65ec4374597 +Author: Mats Petersson +Date: Wed Jul 7 16:58:32 2021 +0100 + + [flang][OpenMP] Support for Collapse + + Convert Fortran parse-tree into MLIR for collapse-clause. + + Includes simple Fortran to LLVM-IR test, with auto-generated + check-lines (some of which have been edited by hand). + + Reviewed By: kiranchandramohan, shraiysh, peixin + + Differential Revision: https://reviews.llvm.org/D125302 + +commit 1bffc75383a2285e69deda90cd10860769485234 +Author: Eric Schweitz +Date: Fri Apr 22 13:59:17 2022 -0700 + + Upstream support for POINTER assignment in FORALL. + + Reviewed By: vdonaldson, PeteSteinfeld + + Differential Revision: https://reviews.llvm.org/D125140 + +commit b85c39dd007858aac3edd915d802ff191bd58fe3 +Author: Kiran Chandramohan +Date: Fri May 6 11:45:18 2022 +0000 + + [Flang][OpenMP] Initial lowering of the OpenMP worksharing loop + + The OpenMP worksharing loop operation in the dialect is a proper loop + operation and not a container of a loop. So we have to lower the + parse-tree OpenMP loop construct and the do-loop inside the construct + to a omp.wsloop operation and there should not be a fir.do_loop inside + it. This is achieved by skipping fir.do_loop creation and calling genFIR + for the nested evaluations in the lowering of the do construct. + + Note: Handling of more clauses, parallel do, storage of loop index variable etc will come in separate patches. + + Part of the upstreaming effort to move LLVM Flang from fir-dev branch of + https://github.com/flang-compiler/f18-llvm-project to the LLVM Project. + + Reviewed By: peixin + + Differential Revision: https://reviews.llvm.org/D125024 + + Co-authored-by: Sourabh Singh Tomar + Co-authored-by: Shraiysh Vaishay + +commit aa0e167fab88cbef4d3f301534c86d143f2274ec +Author: Kiran Chandramohan +Date: Fri May 6 09:09:01 2022 +0000 + + [Flang] Lower Unstructured do loops + + The FIR `do_loop` is designed as a structured operation with a single + block inside it. Presence of unstructured constructs like jumps, exits + inside the loop will cause the loop to be marked as unstructured. These + loops are lowered using the `control-flow` dialect branch operations. + + Fortran semantics do not allow the loop variable to be modified inside + the loop. To prevent accidental modification, the iteration of the + loop is modeled by two variables, trip-count and loop-variable. + -> The trip-count and loop-variable are initialized in the pre-header. + The trip-count is set as (end-start+step)/step where end, start and + step have the usual meanings. The loop-variable is initialized to start. + -> The header block contains a conditional branch instruction which + selects between branching to the body of the loop or the exit block + depending on the value of the trip-count. + -> Inside the body, the trip-count is decremented and the loop-variable + incremented by the step value. Finally it branches to the header of the + loop. + + Part of the upstreaming effort to move LLVM Flang from fir-dev branch of + https://github.com/flang-compiler/f18-llvm-project to the LLVM Project. + + Reviewed By: awarzynski + + Differential Revision: https://reviews.llvm.org/D124837 + + Co-authored-by: Val Donaldson + Co-authored-by: Eric Schweitz + Co-authored-by: Jean Perier + Co-authored-by: Peter Klausler + +commit 2c8cb9acb51e2fa74bf9339ddd0884ef9d921dfc +Author: Jean Perier +Date: Fri Apr 29 14:52:27 2022 +0200 + + [flang] Handle common block with different sizes in same file + + Semantics is not preventing a named common block to appear with + different size in a same file (named common block should always have + the same storage size (see Fortran 2018 8.10.2.5), but it is a common + extension to accept different sizes). + + Lowering was not coping with this well, since it just use the first + common block appearance, starting with BLOCK DATAs to define common + blocks (this also was an issue with the blank common block, which can + legally appear with different size in different scoping units). + + Semantics is also not preventing named common from being initialized + outside of a BLOCK DATA, and lowering was dealing badly with this, + since it only gave an initial value to common blocks Globals if the + first common block appearance, starting with BLOCK DATAs had an initial + value. + + Semantics is also allowing blank common to be initialized, while + lowering was assuming this would never happen, and was never creating + an initial value for it. + + Lastly, semantics was not complaining if a COMMON block was initialized + in several scoping unit in a same file, while lowering can only generate + one of these initial value. + + To fix this, add a structure to keep track of COMMON block properties + (biggest size, and initial value if any) at the Program level. Once the + size of a common block appearance is know, the common block appearance + is checked against this information. It allows semantics to emit an error + in case of multiple initialization in different scopes of a same common + block, and to warn in case named common blocks appears with different + sizes. Lastly, this allows lowering to use the Program level info about + common blocks to emit the right GlobalOp for a Common Block, regardless + of the COMMON Block appearances order: It emits a GlobalOp with the + biggest size, whose lowest bytes are initialized with the initial value + if any is given in a scope where the common block appears. + + Lowering is updated to go emit the common blocks before anything else so + that the related GlobalOps are available when lowering the scopes where + common block appear. It is also updated to not assume that blank common + are never initialized. + + Differential Revision: https://reviews.llvm.org/D124622 + +commit b5b3e50f65ee99257041723e7645d44c1aeb1117 +Author: Kiran Chandramohan +Date: Thu Apr 28 12:20:11 2022 +0000 + + [Flang] Initial lowering of the Fortran Do loop + + This patch adds code to lower simple Fortran Do loops with loop control. + Lowering is performed by the the `genFIR` function when called with a + `Fortran::parser::DoConstruct`. `genFIR` function calls `genFIRIncrementLoopBegin` + then calls functions to lower the body of the loop and finally calls + the function `genFIRIncrementLoopEnd`. `genFIRIncrementLoopBegin` is + responsible for creating the FIR `do_loop` as well as storing the value of + the loop index to the loop variable. `genFIRIncrementLoopEnd` returns + the incremented value of the loop index and also stores the index value + outside the loop. This is important since the loop variable can be used + outside the loop. Information about a loop is collected in a structure + `IncrementLoopInfo`. + + Note 1: Future patches will bring in lowering for unstructured, + infinite, while loops + Note 2: This patch is part of upstreaming code from the fir-dev branch of + https://github.com/flang-compiler/f18-llvm-project. + + Reviewed By: awarzynski + + Differential Revision: https://reviews.llvm.org/D124277 + + Co-authored-by: Eric Schweitz + Co-authored-by: Jean Perier + Co-authored-by: Val Donaldson + Co-authored-by: Peter Klausler + Co-authored-by: Valentin Clement + +commit b6b8d34554a4d85ec064463b54a27e073c42beeb +Author: Peixin-Qiao +Date: Thu Apr 28 09:40:30 2022 +0800 + + [flang] Add lowering stubs for OpenMP/OpenACC declarative constructs + + This patch provides the basic infrastructure for lowering declarative + constructs for OpenMP and OpenACC. + + This is part of the upstreaming effort from the fir-dev branch in [1]. + [1] https://github.com/flang-compiler/f18-llvm-project + + Reviewed By: kiranchandramohan, shraiysh, clementval + + Differential Revision: https://reviews.llvm.org/D124225 + +commit acd75440c67acc31d9a5b0afdd64b5700be8960e +Author: Kiran Chandramohan +Date: Wed Apr 27 12:19:54 2022 +0000 + + [Flang] Lower the FailImage Statement + + Lowering of FailImage statement generates a runtime call and the + unreachable operation. The unreachable operation cannot terminate + a structured operation like the IF operation, hence mark as + unstructured. + + Note: This patch is part of upstreaming code from the fir-dev branch of + https://github.com/flang-compiler/f18-llvm-project. + + Reviewed By: clementval + + Differential Revision: https://reviews.llvm.org/D124520 + + Co-authored-by: Eric Schweitz + +commit 58ceae9561fbae75fa175244db4e0e4bfe7416fd +Author: River Riddle +Date: Mon Apr 18 11:53:47 2022 -0700 + + [mlir:NFC] Remove the forward declaration of FuncOp in the mlir namespace + + FuncOp has been moved to the `func` namespace for a little over a month, the + using directive can be dropped now. + +commit 07e16a2aae68a02629cbcb34a0c0b12cb84754f8 +Author: Kiran Chandramohan +Date: Mon Apr 11 09:05:00 2022 +0000 + + [Flang][OpenMP] Add implementation of privatisation + + Privatisation creates local copies of variables in the OpenMP region. + Two functions `createHostAssociateVarClone` and `copyHostAssociateVar` + are added to create a clone of the variable for basic privatisation and to + copy the contents for first-privatisation. + + Note: Tests for more data-types will be added when the fir.do_loop is + upstreamed. + + This is part of the upstreaming effort from the fir-dev branch in [1]. + [1] https://github.com/flang-compiler/f18-llvm-project + + Reviewed By: peixin, NimishMishra + + Differential Revision: https://reviews.llvm.org/D122595 + + Co-authored-by: Jean Perier + Co-authored-by: Eric Schweitz + Co-authored-by: Peter Klausler + Co-authored-by: Valentin Clement + Co-authored-by: Sourabh Singh Tomar + Co-authored-by: Nimish Mishra + Co-authored-by: Peixin-Qiao + +commit 534b228313409a781060856d6cb6c2e9523cba5b +Author: Valentin Clement +Date: Mon Mar 28 13:36:10 2022 +0200 + + [flang] Lower some coarray statements to their runtime functions + + This patch adds the lowering of coarray statements to the runtime + functions. The runtime functions are currently not implemented. + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: jeanPerier + + Differential Revision: https://reviews.llvm.org/D122466 + +commit 12d22cea73534ecf5d2d8c09181807e883523ac3 +Author: Valentin Clement +Date: Thu Mar 24 15:00:52 2022 +0100 + + [flang][OpenACC] Lower enter data directive + + This patch adds lowering for the `!$acc enter data` directive + from the PFT to OpenACC dialect. + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: PeteSteinfeld + + Differential Revision: https://reviews.llvm.org/D122384 + +commit 74f992929ecf660bb299e5767a1fba15e9181613 +Author: Valentin Clement +Date: Tue Mar 22 16:11:42 2022 +0100 + + [flang][NFC] Remove unused variable + + Fix for buildbot failure shown after fe252f8ed6369acdb13d4e290d3b9dfe2ec4eb8e + +commit fe252f8ed6369acdb13d4e290d3b9dfe2ec4eb8e +Author: Valentin Clement +Date: Tue Mar 22 15:40:32 2022 +0100 + + [flang] Lower boxed procedure + + In FIR, we want to wrap function pointers in a special box known as a + boxproc value. Fortran has a limited form of dynamic scoping + [https://tinyurl.com/2p8v2hw7] between "host procedures" and "internal + procedures". There are a number of implementations possible. + + Boxproc typed values abstract away the implementation details of when a + function pointer can be passed directly (as a raw address) and when a + function pointer has to account for the presence of a dynamic scope. + When lowering Fortran syntax to FIR, all function pointers are emboxed + as boxproc values. + + When creating LLVM IR, we must strip away the abstraction and produce + low-level LLVM "assembly" code. This patch implements that + transformation as converting the boxproc values to either raw function + pointers or executable trampolines on the stack as needed. The + trampoline then captures the dynamic scope context within an executable + thunk that can be passed instead of the function's raw address. + + Some extra handling is required for Fortran functions that return a + character value to deal with LEN values here. + + Some of the code in Bridge.cpp and ConvertExpr.cpp and be re-arranged to + faciliate the upstreaming effort. + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: jeanPerier, PeteSteinfeld + + Differential Revision: https://reviews.llvm.org/D122223 + + Co-authored-by: mleair + Co-authored-by: Jean Perier + Co-authored-by: Eric Schweitz + Co-authored-by: V Donaldson + Co-authored-by: Kiran Chandramohan + +commit 308fc3f27797ce2b0dc01970d6fe2c6c9e1f55c7 +Author: Valentin Clement +Date: Fri Mar 18 15:39:57 2022 +0100 + + [flang] Lower select case statement + + This patch adds lowering for the `select case` + statement. + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: jeanPerier + + Differential Revision: https://reviews.llvm.org/D122007 + + Co-authored-by: Jean Perier + Co-authored-by: Eric Schweitz + Co-authored-by: V Donaldson + +commit fb99266401960afd99845890720b4d9b3ecd63d2 +Author: Eric Schweitz +Date: Wed Mar 16 15:23:25 2022 -0700 + + [flang] Remove unused code and redundant assertion. + + Differential Revision: https://reviews.llvm.org/D121864 + +commit 9aeb7f035bdde83501e5eddd9e6ad175b8ed697f +Author: Valentin Clement +Date: Wed Mar 16 17:10:31 2022 +0100 + + [flang] Lower IO input with vector subscripts + + This patch adds lowering for IO input with vector subscripts. + It defines a VectorSubscriptBox class that allow representing and working + with a lowered Designator containing vector subscripts while ensuring + all the subscripts expression are only lowered once. + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: PeteSteinfeld + + Differential Revision: https://reviews.llvm.org/D121806 + + Co-authored-by: Jean Perier + Co-authored-by: Eric Schweitz + +commit 94a11063573b4e1a3405d4a73d9928083115a6c1 +Author: Valentin Clement +Date: Tue Mar 15 22:18:45 2022 +0100 + + [flang] Lower min|max intrinsics + + This patch adds lowering for the following intrinsics: + - `max` + - `maxloc` + - `maxval` + - `minloc` + - `minval` + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: PeteSteinfeld + + Differential Revision: https://reviews.llvm.org/D121701 + + Co-authored-by: Jean Perier + Co-authored-by: Eric Schweitz + Co-authored-by: mleair + +commit 8b5035333518c0363e4779dc1df855f06d3499ba +Author: Valentin Clement +Date: Tue Mar 15 22:03:14 2022 +0100 + + [flang] Lower alternate return + + This patch adds the lowering infrastructure for the lowering of + alternat returns. + + This patch is part of the upstreaming effort from fir-dev branch. + + Depends on D121698 + + Reviewed By: PeteSteinfeld + + Differential Revision: https://reviews.llvm.org/D121699 + + Co-authored-by: V Donaldson + Co-authored-by: Jean Perier + Co-authored-by: Eric Schweitz + +commit 76134f4138fcd1ef4fec989db8c050e793ee187d +Author: Valentin Clement +Date: Tue Mar 15 22:01:34 2022 +0100 + + [flang] Lower entry statement + + This patch add the lowering for the entry statement. + + This patch is part of the upstreaming effort from fir-dev branch. + + Depends on D121697 + + Reviewed By: PeteSteinfeld + + Differential Revision: https://reviews.llvm.org/D121698 + + Co-authored-by: V Donaldson + Co-authored-by: Jean Perier + Co-authored-by: Eric Schweitz + +commit a1425019e7207e8dc53e627aacfd547415a10b35 +Author: Valentin Clement +Date: Tue Mar 15 21:57:30 2022 +0100 + + [flang] Lower more pointer assignments/disassociation cases + + This patch lowers more cases of pointer assignments and + disassociations. + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: PeteSteinfeld, schweitz + + Differential Revision: https://reviews.llvm.org/D121697 + + Co-authored-by: V Donaldson + Co-authored-by: Jean Perier + Co-authored-by: mleair + Co-authored-by: Eric Schweitz + +commit d8222d91c6f41725aa7669bea24932e072bc2767 +Author: Valentin Clement +Date: Mon Mar 14 18:15:16 2022 +0100 + + [flang] Lower format statement + + This patch lowers the format statement. + + Reviewed By: PeteSteinfeld + + Differential Revision: https://reviews.llvm.org/D121611 + +commit 72276bdaff931910f62a84336b3e864ab48bac06 +Author: Valentin Clement +Date: Thu Mar 10 20:19:57 2022 +0100 + + [flang] Lower pointer component in derived type + + This patch lowers pointer component part of derived types to + FIR. + + This patch is part of the upstreaming effort from fir-dev branch. + + Depends on D121383 + + Reviewed By: PeteSteinfeld, schweitz + + Differential Revision: https://reviews.llvm.org/D121384 + + Co-authored-by: V Donaldson + Co-authored-by: Jean Perier + Co-authored-by: Eric Schweitz + +commit 88ae0d61c31674bd75144c246ae25b55ecc5bff9 +Author: Valentin Clement +Date: Thu Mar 10 19:43:11 2022 +0100 + + [flang] Lower general forall statement + + This patch lowers general forall statements. The forall + are lowered to nested loops. + + This patch is part of the upstreaming effort from fir-dev branch. + + Depends on D121385 + + Reviewed By: PeteSteinfeld, schweitz + + Differential Revision: https://reviews.llvm.org/D121386 + + Co-authored-by: V Donaldson + Co-authored-by: Jean Perier + Co-authored-by: Eric Schweitz + +commit 7a6a1655d83b3ff79d120e399d8b9cc7ad2b143c +Author: Valentin Clement +Date: Thu Mar 10 18:43:40 2022 +0100 + + [flang] Lower where statement + + This patch lowers where statement to FIR. + The where statement is lowered to a conbination of + loops and if conditions. + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: PeteSteinfeld + + Differential Revision: https://reviews.llvm.org/D121385 + + Co-authored-by: Jean Perier + Co-authored-by: Eric Schweitz + +commit e0f549a43ae02e65fce6c9c7a567fe3dc27bec9b +Author: Shraiysh Vaishay +Date: Thu Mar 10 22:40:23 2022 +0530 + + [flang] Added basic connect to lower OpenMP constructs + + Reviewed By: clementval + + Differential Revision: https://reviews.llvm.org/D121382 + +commit 589d51ea9f1a469cef2aae306859afaf6d7d5885 +Author: Valentin Clement +Date: Thu Mar 10 18:06:20 2022 +0100 + + [flang] Lower basic derived types + + This patch lowers basic derived type to FIR. + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: PeteSteinfeld + + Differential Revision: https://reviews.llvm.org/D121383 + + Co-authored-by: V Donaldson + Co-authored-by: Jean Perier + Co-authored-by: Eric Schweitz + +commit 092601d4baab7c13c06b31eda2d5bed91d9a6b65 +Author: Andrzej Warzynski +Date: Thu Mar 3 13:25:09 2022 +0000 + + [flang] Remove 'using namespace mlir;` from header files + + Currently, CGOps.h and FIROps.h contain `using namespace mlir;`. Every + file that includes one of these header files (directly and transitively) + will have the MLIR namespace enabled. With name-clashes within + sub-projects (LLVM and MLIR, MLIR and Flang), this is not desired. Also, + it is not possible to "un-use" a namespace once it is "used". Instead, + we should try to limit `using namespace` to implementation files (i.e. + *.cpp). + + This patch removes `using namespace mlir;` from header files and adjusts + other files accordingly. In header and TableGen files, extra namespace + qualifier is added when referring to symbols defined in MLIR. Similar + approach is adopted in source files that didn't require many changes. In + files that would require a lot of changes, `using namespace mlir;` is + added instead. + + Differential Revision: https://reviews.llvm.org/D120897 + +commit a49bf0ac381c456cdc3dcdf36378a361634f1123 +Author: Valentin Clement +Date: Tue Mar 8 22:08:02 2022 +0100 + + [flang] Lower associate construct + + This patch lowers the `associate` construct. + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: PeteSteinfeld + + Differential Revision: https://reviews.llvm.org/D121239 + + Co-authored-by: V Donaldson + Co-authored-by: Jean Perier + Co-authored-by: Eric Schweitz + +commit 78a127a3ef066451d1a77f452937cecfe25da64b +Author: Valentin Clement +Date: Tue Mar 8 20:17:48 2022 +0100 + + [flang] Lower computed and assigned goto + + This patch lowers the computed and assigned goto statements. + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: PeteSteinfeld, schweitz + + Differential Revision: https://reviews.llvm.org/D121219 + + Co-authored-by: V Donaldson + Co-authored-by: Jean Perier + +commit b3eb0e113e5f12f4fc88bda8bf5a653b00425f2b +Author: Valentin Clement +Date: Tue Mar 8 18:47:28 2022 +0100 + + [flang] Lower sum intrinsic + + This patch enables the lowering of the `sum` intrinsic. It adds + also infrastructure to deal with optional arguments in intrinsics and + implied loops. + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: PeteSteinfeld + + Differential Revision: https://reviews.llvm.org/D121221 + + Co-authored-by: Eric Schweitz + Co-authored-by: Jean Perier + Co-authored-by: mleair + +commit c5cf1b903409e491d7599809dc18187363d7be21 +Author: Valentin Clement +Date: Mon Mar 7 21:22:28 2022 +0100 + + [flang] Lower allocate and deallocate statements + + This patch add the lowering for the allocate + and the deallocate statements. + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: PeteSteinfeld + + Differential Revision: https://reviews.llvm.org/D121146 + + Co-authored-by: Eric Schweitz + Co-authored-by: Jean Perier + Co-authored-by: V Donaldson + +commit 764f95a8c78c33296aaee4a9ae8f1fba341a595b +Author: Valentin Clement +Date: Mon Mar 7 19:55:48 2022 +0100 + + [flang] Add lowering for host association + + This patches adds the code to handle host association for + inner subroutines and functions. + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: jeanPerier + + Differential Revision: https://reviews.llvm.org/D121134 + + Co-authored-by: Eric Schweitz + Co-authored-by: Jean Perier + Co-authored-by: V Donaldson + +commit 17d71347b2ede797346b0a499665cf51e593b150 +Author: Valentin Clement +Date: Wed Mar 2 18:26:13 2022 +0100 + + [flang] Handle module in lowering pass + + This patch enables the lowering of basic modules and functions/subroutines + in modules. + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: PeteSteinfeld + + Differential Revision: https://reviews.llvm.org/D120819 + + Co-authored-by: Eric Schweitz + Co-authored-by: Jean Perier + +commit 7e32cada0105ec8756ce09a9fc07e2b10803d620 +Author: Valentin Clement +Date: Wed Mar 2 18:02:41 2022 +0100 + + [flang] Lower inquire statement + + This patch adds the lowering of the `inquire` statement. + + This patch is part of the upstreaming effort from fir-dev branch. + + Depends on D120822 + + Reviewed By: schweitz + + Differential Revision: https://reviews.llvm.org/D120823 + + Co-authored-by: Jean Perier + +commit 46f46a3763c494054227b2b0f551fc34fa9af367 +Author: Valentin Clement +Date: Wed Mar 2 17:58:38 2022 +0100 + + [flang] Lower basic IO file statements + + This patches adds lowering for couple of basic io statements such as `flush`, + `endfile`, `backspace` and `rewind` + + This patch is part of the upstreaming effort from fir-dev branch. + + Depends on D120821 + + Reviewed By: schweitz + + Differential Revision: https://reviews.llvm.org/D120822 + + Co-authored-by: Eric Schweitz + Co-authored-by: Jean Perier + +commit db48f7b2f7b3a3a62822bd41de84b9ccb6093b5f +Author: Valentin Clement +Date: Wed Mar 2 17:55:10 2022 +0100 + + [flang] Lower IO open and close statements + + This patch adds the lowering of open and close statements + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: schweitz + + Differential Revision: https://reviews.llvm.org/D120821 + + Co-authored-by: Jean Perier + +commit d88dfd2b311d5f7f8ab9faa0edfd380c1fd2d2b2 +Author: Valentin Clement +Date: Tue Mar 1 22:28:16 2022 +0100 + + [flang] Handle dynamic array lowering + + This patch enables dynamic array lowering + and use the funcationality inside some IO tests. + + This patch is part of the upstreaming effort from fir-dev branch. + + Depends on D120743 + + Reviewed By: PeteSteinfeld, schweitz + + Differential Revision: https://reviews.llvm.org/D120744 + + Co-authored-by: Eric Schweitz + Co-authored-by: Jean Perier + Co-authored-by: V Donaldson + +commit 8c22cb846f31c42ce1d19370025ad05c4db56256 +Author: Valentin Clement +Date: Tue Mar 1 21:47:40 2022 +0100 + + [flang] Lower basic IO statement + + This patch enables the lowering of the print, read and write + IO statements. + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: PeteSteinfeld, schweitz + + Differential Revision: https://reviews.llvm.org/D120743 + + Co-authored-by: Eric Schweitz + Co-authored-by: Jean Perier + Co-authored-by: V Donaldson + Co-authored-by: Kiran Chandramohan + +commit 23aa5a744666b281af807b1f598f517bf0d597cb +Author: River Riddle +Date: Sat Feb 26 14:49:54 2022 -0800 + + [mlir] Rename the Standard dialect to the Func dialect + + The last remaining operations in the standard dialect all revolve around + FuncOp/function related constructs. This patch simply handles the initial + renaming (which by itself is already huge), but there are a large number + of cleanups unlocked/necessary afterwards: + + * Removing a bunch of unnecessary dependencies on Func + * Cleaning up the From/ToStandard conversion passes + * Preparing for the move of FuncOp to the Func dialect + + See the discussion at https://discourse.llvm.org/t/standard-dialect-the-final-chapter/6061 + + Differential Revision: https://reviews.llvm.org/D120624 + +commit 37e84d9be06d52abeb387acdfb6dacb274c63da5 +Author: Valentin Clement +Date: Fri Feb 25 18:21:44 2022 +0100 + + [flang] Lower simple character return + + Handles function with character return. + + Character scalar results are passed as arguments in lowering so + that an assumed length character function callee can access the result + length. + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: PeteSteinfeld, schweitz + + Differential Revision: https://reviews.llvm.org/D120558 + + Co-authored-by: Eric Schweitz + Co-authored-by: Jean Perier + Co-authored-by: V Donaldson + +commit f9704f0cfb7a9edb86c0755bafef54cbd365743d +Author: Valentin Clement +Date: Thu Feb 24 21:09:40 2022 +0100 + + [flang] Simple array assignment lowering + + This patch handles lowering of simple array assignment. + + ``` + a(:) = 10 + ``` + + or + + ``` + a(1) = 1 + ``` + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: PeteSteinfeld, schweitz + + Differential Revision: https://reviews.llvm.org/D120501 + + Co-authored-by: Jean Perier + Co-authored-by: V Donaldson + Co-authored-by: Eric Schweitz + +commit 2a59ead118065012446bdbd0a31dc52799212f87 +Author: Valentin Clement +Date: Thu Feb 24 18:11:41 2022 +0100 + + [flang] Lower allocatable assignment for scalar + + Add lowering for simple assignement on allocatable + scalars. + + This patch is part of the upstreaming effort from fir-dev branch. + + Depends on D120483 + + Reviewed By: PeteSteinfeld + + Differential Revision: https://reviews.llvm.org/D120488 + + Co-authored-by: Eric Schweitz + Co-authored-by: Jean Perier + +commit d0b70a070aedc3665e352d06c7d996a4050f8fc8 +Author: Valentin Clement +Date: Wed Feb 23 19:48:07 2022 +0100 + + [flang] Lower function and subroutine calls + + This patch introduce basic function/subroutine calls. + Because of the state of lowering only simple scalar arguments + can be used in the calls. This will be enhanced in follow up + patches with arrays, allocatable, pointer ans so on. + + ``` + subroutine sub1() + end + + subroutine sub2() + call sub1() + end + ``` + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: schweitz + + Differential Revision: https://reviews.llvm.org/D120419 + + Co-authored-by: Eric Schweitz + Co-authored-by: Jean Perier + Co-authored-by: V Donaldson + +commit e641c29f41971597dbe190f98784f0e4cfc220cc +Author: Valentin Clement +Date: Thu Feb 17 18:23:22 2022 +0100 + + [flang] Lower simple scalar assignment + + This patch hanlde lowering of simple scalar assignment. + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: PeteSteinfeld + + Differential Revision: https://reviews.llvm.org/D120058 + + Co-authored-by: Jean Perier + +commit da7c77b82c217592cc14f5b5a3c6a9e6741896af +Author: Valentin Clement +Date: Wed Feb 16 20:27:23 2022 +0100 + + [flang] Handle lowering arguments in subroutine and function + + This patch adds infrsatrcutrue to be able to lower + arguments in functions and subroutines. + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: PeteSteinfeld + + Differential Revision: https://reviews.llvm.org/D119957 + + Co-authored-by: Eric Schweitz + Co-authored-by: Jean Perier + +commit ad40cc14a8b728dedc20c9397489bda50185b176 +Author: Valentin Clement +Date: Mon Feb 14 21:31:46 2022 +0100 + + [flang] Lower basic function with scalar integer/logical return value + + This patch allows the lowring of simple empty function with a + scalar integer or logical return value. + The code in ConvertType.cpp is cleaned up as well. This file was landed + together with the initial flang push and lowering was still a prototype + at that time. Some more cleaning will come with follow up patches. + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: PeteSteinfeld + + Differential Revision: https://reviews.llvm.org/D119698 + + Co-authored-by: Jean Perier + +commit 85b89ed213c41a8d7dafff957c8d20a247e6d9df +Author: Valentin Clement +Date: Thu Feb 10 18:35:16 2022 +0100 + + [flang] Lower simple RETURN statement + + This patch adds the lowering for the RETURN statement + without alternate returns in the main program or in subroutine + and functions. + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: kiranchandramohan + + Differential Revision: https://reviews.llvm.org/D119429 + + Co-authored-by: V Donaldson + Co-authored-by: Eric Schweitz + Co-authored-by: Jean Perier + +commit ae37bb9804c7b6ee7e6d1c070889c30f74be1001 +Author: Kiran Chandramohan +Date: Tue Feb 8 23:01:39 2022 +0000 + + [Flang] Add support for lowering the goto statement + + This patch adds support for lowering the Fortran goto statement from + parse-tree to MLIR. The goto statement in Fortran is a form of + unstructured control flow. The statement transfers control to the + code starting at the label specified in the statement. This can be + faithfully represented in MLIR by a branch instruction. + + To assist the lowering of code with unstructured control flow, blocks + are created in advance and associated with the relevant pre-fir tree + evaluations. + + This is part of the upstreaming effort from the fir-dev branch in [1]. + + [1] https://github.com/flang-compiler/f18-llvm-project + + Reviewed By: clementval, vdonaldson, schweitz, awarzynski + + Differential Revision: https://reviews.llvm.org/D118983 + + Co-authored-by: V Donaldson + Co-authored-by: Jean Perier + Co-authored-by: Eric Schweitz + +commit 2c2e5a5d0f191027ab447899599baeaf744fc2eb +Author: Valentin Clement +Date: Mon Feb 7 09:12:17 2022 +0100 + + [flang] Basic local variable lowering + + This patch add lowering for simple local variable. + + - The signatures in `ConvertType.h` have been simplified to take advantage of the `AbstractConverter`. + - The lowering make use of the `allocateLocal` from the `FirOpBuilder`. + + This lowering is used in patch D118982 + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: kiranchandramohan, jeanPerier, schweitz + + Differential Revision: https://reviews.llvm.org/D118978 + +commit dc6a344637a6417aedf85e5fd1236e7fcd43c3f1 +Author: Valentin Clement +Date: Thu Feb 3 10:40:19 2022 +0100 + + [flang] Add lowering for integer constant + + This patch enables the lowering of integer constant. + + The `ScalarExprLowering` class is introduced in `ConvertExpr.cpp` to help + the lowering of expression. This patch adds all the placeholder as well for future + expression lowering with the appropriate TODOs. + + Integer constant expression are lowered to `arith.constant` with an integer type corresponding to the kind value. + + This patch is in support of D118787 + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: schweitz, kiranchandramohan + + Differential Revision: https://reviews.llvm.org/D118786 + +commit 415267407db27a6ca9818f1d269314fe92dd8042 +Author: Valentin Clement +Date: Wed Feb 2 18:44:09 2022 +0100 + + [flang] Switch return to ExtendedValue in AbstractConverter and Bridge + + Change the signature of `genExprAddr`, `genExprValue` to return a `fir::ExtendedValue` instead of a simple `mlir::Value` + + This patch is a preparation for more lowering to be upstream. It supports D118786 and D118787. + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: kiranchandramohan + + Differential Revision: https://reviews.llvm.org/D118785 + +commit db01b123d012df2f0e6acf7e90bf4ba63382587c +Author: Valentin Clement +Date: Wed Feb 2 08:15:26 2022 +0100 + + [flang] Lower PAUSE statement + + Lower the PAUSE statement to a runtime call. + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: kiranchandramohan, schweitz + + Differential Revision: https://reviews.llvm.org/D118699 + + Co-authored-by: Eric Schweitz + Co-authored-by: Jean Perier + +commit aab4263ad66c99da15cb9bda6aec8269d159b113 +Author: Valentin Clement +Date: Tue Feb 1 20:53:00 2022 +0100 + + [flang] Lower basic STOP statement + + This patch lowers STOP statement without arguments + and ERROR STOP. STOP statement with arguments lowering will + come in later patches ince it requires some expression lowering + to be added. + STOP statement is lowered to a runtime call. + + Also makes sure we are creating a constant in the MLIR arith constant. + + This patch is part of the upstreaming effort from fir-dev branch. + + Reviewed By: kiranchandramohan, schweitz + + Differential Revision: https://reviews.llvm.org/D118697 + + Co-authored-by: Eric Schweitz + +commit 89275300d861aef73225428c95fdb069de36186d +Author: Valentin Clement +Date: Tue Feb 1 15:26:47 2022 +0100 + + [flang] Add lowering for basic empty SUBROUTINE + + This patch adds the ability to lower an empty subroutine. + + Reviewed By: kiranchandramohan + + Differential Revision: https://reviews.llvm.org/D118695 + +commit 990759136a268fa51695d04f845ce7f9b36a842f +Author: Valentin Clement +Date: Tue Feb 1 13:49:49 2022 +0100 + + [flang] Add lowering placeholders + + This patch puts in place the differents + function to lower the evaluation list. All functions + are just placholders with TODOs for now. + Follow up patches will bring the proper lowering in these + functions. + + Reviewed By: jeanPerier + + Differential Revision: https://reviews.llvm.org/D118678 + +commit e1a12767ee628e179efc8733449f98018a686b4d +Author: Valentin Clement +Date: Fri Jan 28 22:39:44 2022 +0100 + + [flang] Initial lowering for empty program + + This patch enable lowering from Fortran to FIR for a basic empty + program. It brings all the infrastructure needed for that. As discussed + previously, this is the first patch for lowering and follow up patches + should be smaller. + + With this patch we can lower the following code: + + ``` + program basic + end program + ``` + + To a the FIR equivalent: + + ``` + func @_QQmain() { + return + } + ``` + + Follow up patch will add lowering of more complex constructs. + + Reviewed By: kiranchandramohan, schweitz, PeteSteinfeld + + Differential Revision: https://reviews.llvm.org/D118436 + +commit 1f8790050b0e99e7b46cc69518aa84f46f50738e +Author: Tim Keith +Date: Sat Mar 28 21:00:16 2020 -0700 + + [flang] Reformat with latest clang-format and .clang-format + + Original-commit: flang-compiler/f18@9fe84f45d7fd685051004678d6b5775dcc4c6f8f + Reviewed-on: https://github.com/flang-compiler/f18/pull/1094 + +commit 64ab3302d5a130c00b66a6957b2e7f0c9b9c537d +Author: CarolineConcatto <51754594+CarolineConcatto@users.noreply.github.com> +Date: Tue Feb 25 15:11:52 2020 +0000 + + [flang] [LLVMify F18] Compiler module folders should have capitalised names (flang-compiler/f18#980) + + This patch renames the modules in f18 to use a capital letter in the + module name + + Signed-off-by: Caroline Concatto + + Original-commit: flang-compiler/f18@d2eb7a1c443d1539ef12b6f027074a0eb15b1ea0 + Reviewed-on: https://github.com/flang-compiler/f18/pull/980 From fb313786317fce94815b932c5a4b023425a29a73 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 27 Jan 2026 21:27:10 +0100 Subject: [PATCH 61/64] Test is passing --- openmp/runtime/test/transform/interchange/taskloop-intdo.f90 | 2 -- 1 file changed, 2 deletions(-) diff --git a/openmp/runtime/test/transform/interchange/taskloop-intdo.f90 b/openmp/runtime/test/transform/interchange/taskloop-intdo.f90 index d79f92d2ad074..80e0ee62e020b 100644 --- a/openmp/runtime/test/transform/interchange/taskloop-intdo.f90 +++ b/openmp/runtime/test/transform/interchange/taskloop-intdo.f90 @@ -1,5 +1,3 @@ - -! XFAIL: * ! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe ! RUN: %t.exe | FileCheck %s --match-full-lines From 2c814ca1875f0c9065a5ee553ee0d8455872774f Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Wed, 28 Jan 2026 10:57:25 +0100 Subject: [PATCH 62/64] Post- merge fix --- flang/lib/Lower/Bridge.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 1783786d3c8f1..db1af34da2b38 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -2392,8 +2392,8 @@ class FirConverter : public Fortran::lower::AbstractConverter { &loopControl->u)) { // Non-concurrent increment loop. IncrementLoopInfo &info = incrementLoopNestInfo.emplace_back( - *bounds->name.thing.symbol, bounds->lower, bounds->upper, - bounds->step); + *bounds->Name().thing.symbol, bounds->Lower(), bounds->Upper(), + bounds->Step()); if (unstructuredContext) { maybeStartBlock(preheaderBlock); info.hasRealControl = info.loopVariableSym->GetType()->IsNumeric( From dd4e8037f6b6cced5b2589d0687a7b1ddba5fc6d Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 28 Feb 2026 00:32:10 +0100 Subject: [PATCH 63/64] Post-merge fixes --- flang/lib/Semantics/check-omp-loop.cpp | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/flang/lib/Semantics/check-omp-loop.cpp b/flang/lib/Semantics/check-omp-loop.cpp index 03996e05e0be7..56dea98d8065b 100644 --- a/flang/lib/Semantics/check-omp-loop.cpp +++ b/flang/lib/Semantics/check-omp-loop.cpp @@ -186,22 +186,7 @@ void OmpStructureChecker::HasInvalidLoopBinding( } } -static bool IsLoopTransforming(llvm::omp::Directive dir) { - switch (dir) { - // TODO case llvm::omp::Directive::OMPD_flatten: - case llvm::omp::Directive::OMPD_fuse: - case llvm::omp::Directive::OMPD_interchange: - case llvm::omp::Directive::OMPD_nothing: - case llvm::omp::Directive::OMPD_reverse: - // TODO case llvm::omp::Directive::OMPD_split: - case llvm::omp::Directive::OMPD_stripe: - case llvm::omp::Directive::OMPD_tile: - case llvm::omp::Directive::OMPD_unroll: - return true; - default: - return false; - } -} + void OmpStructureChecker::CheckSIMDNest(const parser::OpenMPConstruct &c) { // Check the following: From acab86d21ebd6843d439653be3ad2566a53403fb Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sun, 1 Mar 2026 00:36:59 +0100 Subject: [PATCH 64/64] clang-format --- flang/lib/Semantics/check-omp-loop.cpp | 2 - log.log | 12746 ----------------------- 2 files changed, 12748 deletions(-) delete mode 100644 log.log diff --git a/flang/lib/Semantics/check-omp-loop.cpp b/flang/lib/Semantics/check-omp-loop.cpp index 6e698f2d350ab..ec48d26046a6e 100644 --- a/flang/lib/Semantics/check-omp-loop.cpp +++ b/flang/lib/Semantics/check-omp-loop.cpp @@ -186,8 +186,6 @@ void OmpStructureChecker::HasInvalidLoopBinding( } } - - void OmpStructureChecker::CheckSIMDNest(const parser::OpenMPConstruct &c) { // Check the following: // The only OpenMP constructs that can be encountered during execution of diff --git a/log.log b/log.log deleted file mode 100644 index 7d8ffe7afc11f..0000000000000 --- a/log.log +++ /dev/null @@ -1,12746 +0,0 @@ -commit 9be7c1037f26146e469c85061d6685a9172c5de9 -Author: Tom Eccles -Date: Mon Jan 26 14:06:46 2026 +0000 - - [flang][Lower] Fix UB in location handling (#177944) - - Previously `prov` received the address of a variable allocated in stack - memory (the contents of `include`). `prov` would then access that memory - outside of the lifetime of that stack allocation: leading to UB. - - This only manifested on thinLTO builds. No added test because - flang/test/Lower/location.f90 covers it (when thinLTO is enabled) and - there are bots guarding the thin-lto configuration. - - Fixes #156629 - Fixes #176404 - -commit 2f1b1f3a543f79774aa5463c956404d330143a06 -Author: Chi-Chun, Chen -Date: Sat Jan 24 13:19:13 2026 -0600 - - [flang][mlir][OpenMP] Support inbranch and notinbranch clause (#177310) - - Support inbranch and notinbranch clause for OpenMP declare simd - directive. - -commit 1036d782ae83b37578d6a810544a30f7453b79e9 -Author: Andre Kuhlenschmidt -Date: Thu Jan 22 08:58:10 2026 -0800 - - [flang][lowering] lowering assigned goto of allocatable variable (#175874) - - towards [#173594](https://github.com/llvm/llvm-project/issues/173594) - - This PR adds a lowering for `AssignedGoto`s that reference an - allocatable variable. - -commit 7a74e7fba33c8e016c79f0b4fa55ed908061019f -Author: Abid Qadeer -Date: Wed Jan 21 13:40:01 2026 +0000 - - [flang][OpenMP] Fix mapping of constant arrays. (#176763) - - The compiler skips mapping of named constants (parameters) to OpenMP - target regions under the assumption that constants don't need to be - mapped. This assumption is not valid when array is accessed inside with - dynamic index. The problem can be seen with the following code: - - ``` - module fir_lowering_check - implicit none - - integer, parameter :: dp = selected_real_kind(15, 307) - real(dp), parameter :: arrays(2) = (/ 0.0, 0.0 /) - - contains - - subroutine test(hold) - - integer, intent(in) :: hold - integer :: z - real(dp) :: temp - - !$omp target teams distribute parallel do - do z = 1, 2 - temp = arrays(hold) - end do - !$omp end target teams distribute parallel do - - end subroutine test - end module fir_lowering_check - - program main - use fir_lowering_check - - implicit none - integer :: hold - hold = 1 - call test(hold) - print *, "Finished" - - end program main - ``` - - It fails with the following error - `'hlfir.designate' op using value defined outside the region` - - The fix is to allow mapping of constant arrays and map them as `to`. - -commit d13119f26999533a3048db3db058e19ae02a76aa -Author: Chi-Chun, Chen -Date: Tue Jan 20 11:08:01 2026 -0600 - - [flang][mlir][OpenMP] Add support for uniform clause in declare simd (#176046) - - Define OpenMP uniform clause in mlir and emit it from flang. - -commit d542fac6b16406ec0ed0e168e6c3f4c6be28cff8 -Author: Krzysztof Parzyszek -Date: Tue Jan 20 09:57:35 2026 -0600 - - [flang] Add traits to more AST nodes (#175578) - - Follow-up to PR175211. - - There are still a few AST nodes that don't have any of the standard - traits (Wrapper/Tuple/etc). Because of that they require special - handling in the parse tree visitor. - - Convert a subset of these nodes to the typical format, and remove the - special cases from the parse tree visitor. - - The members of these nodes were frequently used, so instead of - extracting them by hand each time use helper member functions to access - them. - -commit b8fec8ebc632af1627c2c1d88b8c8b8957323a03 -Author: Krzysztof Parzyszek -Date: Tue Jan 20 08:00:00 2026 -0600 - - [flang] Add traits to more AST nodes (#175566) - - Follow-up to PR175211. - - There are still a few AST nodes that don't have any of the standard - traits (Wrapper/Tuple/etc). Because of that they require special - handling in the parse tree visitor. - - Convert a subset of these nodes to the typical format, and remove the - special cases from the parse tree visitor. - -commit 1c6d2add766288e2d0d2b089a2b66f7b0f285141 -Author: Chi-Chun, Chen -Date: Fri Jan 16 10:51:27 2026 -0600 - - [OpenMP][Flang][MLIR] Introduce omp.declare_simd op and emit from Flang (#175604) - - Changes: - - Adds a new `omp.declare_simd` operation to the OpenMP MLIR dialect - - Lowers Fortran `!$omp declare simd` into `omp.declare_simd` inside the - enclosing function body - - mlir to LLVMIR translation and uniform clause will be added in follow-up - PRs. - -commit b86c84ce6381bc1037f3893b063aa019af0fb8eb -Author: Kelvin Li -Date: Fri Jan 16 10:12:02 2026 -0500 - - [flang] Handle unused variable (NFC) (#176274) - -commit 1d4f9ac37c043198d823e85e3cd777dc970d8b75 -Author: Jean-Didier PAILLEUX -Date: Thu Jan 15 18:02:07 2026 +0100 - - [flang] Fix crash with coarray teams #171048 (#172259) - - This PR updates the `CHANGE TEAM` construct to fix the bug mentioned in - the issue #171048. - When a construct such as `IfConstruct` was present in the `CHANGE TEAM` - region, several BB were created but outside the region. - -commit ccbe36f16d88fc6608efb381ecfc8904a1f55437 -Author: khaki3 <47756807+khaki3@users.noreply.github.com> -Date: Mon Jan 12 09:45:47 2026 -0800 - - [flang][acc] Implement cache directive lowering (#174897) - - The `acc.cache` operation is currently defined to be associated with a - loop. However, this implementation generates `acc.cache` as a standalone - data entry operation inside the loop body. The `acc.cache` operation - definition should be updated in a future change to reflect this usage. - - Key implementation details: - - Add semantic checks to validate cache-specific constraints: at least - one bound must be specified for array sections, and only unit stride - is supported - - Use the shared `gatherDataOperandAddrAndBounds` infrastructure to - generate `acc.bounds` for cache operands, handling single elements - (`arr(i)`), full ranges (`arr(l:u)`), and partial ranges with missing - bounds (`arr(l:)` or `arr(:u)`) - - Set the data clause to `acc_cache` with the `readonly` modifier via - the `modifiers` attribute when the `readonly` clause is present - - Update the symbol map so subsequent lowering uses the cache result - - Insert cache operations after loop iterator setup - - Add symbol scope management for constructs inside `acc.loop` - -commit 6bfa042a10a04379261e35a710caadb1c53457c5 -Author: NimishMishra <42909663+NimishMishra@users.noreply.github.com> -Date: Fri Jan 9 06:51:29 2026 -0800 - - [flang][mlir] Add checks and test for linear clause on omp.wsloop and omp.simd (#174916) - - This PR adds additional checks and tests for linear clause on omp.wsloop - and omp.simd (both standalone and composite). For composite simd - constructs, the translation to LLVMIR uses the same - `LinearClauseProcessor` under `convertOmpSimd`, as already present in - previous PRs like https://github.com/llvm/llvm-project/pull/150386 and - https://github.com/llvm/llvm-project/pull/139386 - -commit 568b8e4873b2d04be7ab302246c8e3986ea61176 -Author: Krzysztof Parzyszek -Date: Thu Jan 8 18:26:48 2026 -0600 - - [flang] Add traits to several AST nodes (#175065) - - There are quite a few AST nodes that don't have any of the standard - traits (Wrapper/Tuple/etc). Because of that they require special - handling in the parse tree visitor. - - Convert a subset of these nodes to the typical format, and remove the - special cases from the parse tree visitor. - -commit 3f5d91bfbc17a487fc14ac2c7f2d866fb97e3906 -Author: Chi-Chun, Chen -Date: Tue Jan 6 11:10:03 2026 -0600 - - [Flang][OpenMP] Implement device clause lowering for target directive (#173509) - - Add lowering support for the OpenMP `device` clause on the `target` - directive in Flang. - - The device expression is propagated through MLIR OpenMP and passed to - the host-side `__tgt_target_kernel` call. - -commit 316a9c52f022024978775c9af40ba829d0564888 -Author: Abid Qadeer -Date: Mon Jan 5 14:46:49 2026 +0000 - - [flang] Ignore ambiguous use statement in use_stmt generation. (#174387) - - The https://github.com/llvm/llvm-project/pull/168106 caused build - failures in testcases which have ambiguous use statements. This PR fixes - that by properly ignoring them in `emitUseStatementsFromFunit`. - -commit 212527c00ba60aa5677a1b1acdd0f15b32b8fd01 -Author: Thirumalai Shaktivel <74826228+Thirumalai-Shaktivel@users.noreply.github.com> -Date: Mon Jan 5 13:24:10 2026 +0530 - - [Flang] Add FIR and LLVM lowering support for prefetch directive (#167272) - - Implementation details: - * Add PrefetchOp in FirOps - * Handle PrefetchOp in FIR Lowering and also pass required default - values - * Handle PrefetchOp in CodeGen.cpp - * Add required tests - -commit 11d9694b757b2e2c9f5169967fcc85f25f9a5645 -Author: NimishMishra <42909663+NimishMishra@users.noreply.github.com> -Date: Sat Jan 3 21:37:43 2026 -0800 - - [flang][mlir] Add support for implicit linearization in omp.simd (#150386) - - Up till OpenMP version 4.5, the loop iteration variable in the - associated do-construct of simd is linear with a linear step equal to - the increment of the loop. This PR implements this functionality. For - versions > 4.5, such an implicit linear clause is not assumed for the - loop iteration variable. - - Fixes https://github.com/llvm/llvm-project/issues/171006 - -commit c4387734322b9c050861454f799438346807c7b0 -Author: Victor Chernyakin -Date: Fri Jan 2 21:42:56 2026 -0700 - - [LLVM][ADT] Migrate users of `make_scope_exit` to CTAD (#174030) - - This is a followup to #173131, which introduced the CTAD functionality. - -commit fc9e6e13fd4bb7365e4b9659c08c3440688217ce -Author: Abid Qadeer -Date: Fri Jan 2 12:10:18 2026 +0000 - - [flang] Represent use statement in fir. (#168106) - - We have a longstanding issue in debug info that use statement is not - fully respected. The problem has been described in - https://github.com/llvm/llvm-project/issues/160923. This is first part - of the effort to address this issue. This PR adds infrastructure to emit - `use` statement information in FIR, which will be used by subsequent - patches to generate DWARF debug information. - - The information about use statement is collected during semantic - analysis and stored in `PreservedUseStmt` objects. During lowering, - `fir.use_stmt` operations are emitted for each `PreservedUseStmt` - object. The `fir.use_stmt` operation captures the module name, `only` - list symbols, and any renames specified in the use statement. The - `fir.use_stmt` is removed during `CodeGen`. - -commit 755f298ddcd43045be6eec1a9f15dc7ba820eecd -Author: Krzysztof Parzyszek -Date: Thu Dec 18 08:04:28 2025 -0600 - - [flang][OpenMP] Implement COMBINER clause (#172036) - - This adds parsing and lowering of the COMBINER clause. It utilizes the - existing lowering code for combiner-expression to lower the COMBINER - clause as well. - -commit 1451f3d9b008c76d66c215e5fb4ec3dde0f0d6ca -Author: Krzysztof Parzyszek -Date: Fri Dec 12 08:09:25 2025 -0600 - - [flang][OpenMP] Use StylizedInstance in converted clauses (#171907) - - Invent `StylizedInstance` class to store special variables together with - the instantiated expression in omp::clause::Initializer. This will - eliminate the need for visiting the original AST nodes in lowering to - MLIR. - -commit 3a0c006054f79dbb2c921003898b7242a37a38cb -Author: Tom Eccles -Date: Wed Dec 10 16:51:17 2025 +0000 - - Revert "[flang][OpenMP] Fix firstprivate not working with lastprivate in DO SIMD" (#171646) - - Reverts llvm/llvm-project#170163 - - Regression in fujitsu test suite - -commit 748e7af8dd6e9b4683a6402a0ca6598fe23a9c1e -Author: Krish Gupta -Date: Tue Dec 9 20:40:21 2025 +0530 - - [flang][OpenMP] Fix firstprivate not working with lastprivate in DO SIMD (#170163) - - This fixes a bug where firstprivate was ignored when the same variable - had both firstprivate and lastprivate clauses in a do simd construct. - - What was broken: - ``` - integer :: a - a = 10 - !$omp do simd firstprivate(a) lastprivate(a) - do i = 1, 1 - print *, a ! Should print 10, but printed garbage/0 - a = 20 - end do - !$omp end do simd - print *, a ! Correctly prints 20 - ``` - - Inside the loop, [a] wasn't being initialized from the firstprivate - clause—it just had whatever uninitialized value was there. - - The fix: - - In genCompositeDoSimd(), we were using simdItemDSP to handle - privatization for the whole loop nest. This only looked at SIMD clauses - and missed the firstprivate from the DO part. Changed it to use - wsloopItemDSP instead, which handles both DO clauses (firstprivate, - lastprivate) correctly. - - One line change in OpenMP.cpp - - Tests added: - - Lowering test to check MLIR generation - Runtime test to verify the actual values are correct - image - - - Fixes #168306 - - --------- - - Co-authored-by: Krish Gupta - -commit b360a782ca5da938d5e4f7c791508932a0bfc328 -Author: Akash Banerjee -Date: Fri Dec 5 17:38:41 2025 +0000 - - Reland "[Flang][OpenMP] Add lowering support for is_device_ptr clause (#169331)" (#170851) - - Add support for OpenMP is_device_ptr clause for target directives. - - [MLIR][OpenMP] Add OpenMPToLLVMIRTranslation support for is_device_ptr - #169367 This PR adds support for the OpenMP is_device_ptr clause in the - MLIR to LLVM IR translation for target regions. The is_device_ptr clause - allows device pointers (allocated via OpenMP runtime APIs) to be used - directly in target regions without implicit mapping. - -commit 290b32a699aefbd1f18fe78351655dd42ce98f1e -Author: NimishMishra <42909663+NimishMishra@users.noreply.github.com> -Date: Thu Dec 4 20:39:17 2025 -0800 - - [llvm][mlir][OpenMP] Support translation for linear clause in omp.wsloop and omp.simd (#139386) - - This patch adds support for LLVM translation of linear clause on - omp.wsloop (except for linear modifiers). - -commit be79a0d90ff700b717e72d7c200f58c5918e4301 -Author: theRonShark -Date: Thu Dec 4 19:38:16 2025 -0500 - - Revert "[Flang][OpenMP] Add lowering support for is_device_ptr clause" (#170778) - - Reverts llvm/llvm-project#169331 - -commit a77c4948a5681984accd3c6d35fb51c1c5571a50 -Author: Akash Banerjee -Date: Thu Dec 4 15:57:24 2025 +0000 - - [Flang][OpenMP] Add lowering support for is_device_ptr clause (#169331) - - Add support for OpenMP is_device_ptr clause for target directives. - - [MLIR][OpenMP] Add OpenMPToLLVMIRTranslation support for is_device_ptr #169367 - This PR adds support for the OpenMP is_device_ptr clause in the MLIR to LLVM IR translation for target regions. The is_device_ptr clause allows device pointers (allocated via OpenMP runtime APIs) to be used directly in target regions without implicit mapping. - -commit 5ccf8c90d1e4020d5f9bc255fe521aa0763f2b2b -Author: Tom Eccles -Date: Wed Dec 3 09:36:22 2025 +0000 - - [flang] implement VECTOR VECTORLENGTH directive (#170114) - - This should match exactly the llvm attributes generated by classic - flang. - -commit a09571ed5be3054b546b714c62c078b595d2f1cd -Author: jeanPerier -Date: Tue Dec 2 10:13:23 2025 +0100 - - [flang] represent ABSTRACT in fir.type_info (#170109) - - This patch keeps information about ABSTRACT derived types and DEFERRED - type bound procedures inside fir.type_info dispatch tables. - - This is part of the effort to delay generation of runtime type info - global by keeping the type information in a more condense fashion inside - fir.type_info (which is also easier to use for any potential - optimizations). - -commit d989ff93e2a073cb921cfcfeb9728a0b51892f1a -Author: Jan Leyonberg -Date: Fri Nov 28 09:00:18 2025 -0500 - - [flang][OpenMP] Add lowering of subroutine calls in custom reduction combiners (#169808) - - This patch adds support for lowering subroutine calls in custom - reduction combiners to MLIR. - -commit fd22706e937f7d2563cfa0e433dd735cc5284599 -Author: Krzysztof Parzyszek -Date: Tue Nov 25 17:30:28 2025 -0600 - - [flang][OpenMP] Skip compiler directives in getCollapsedLoopEval (#169565) - - Use `getNestedDoConstruct` from Utils to get the nested DoConstructs. - - Fixes https://github.com/llvm/llvm-project/issues/169532 - -commit 3e86f056217afbe46cd515b3d3c2f1dc7664bebf -Author: Jan Leyonberg -Date: Mon Nov 24 16:00:46 2025 -0500 - - [OpenMP][flang] Lowering of OpenMP custom reductions to MLIR (#168417) - - This patch add support for lowering of custom reductions to MLIR. It - also enhances the capability of the pass to automatically mark functions - as "declare target" by traversing custom reduction initializers and - combiners. - -commit c2d659b9b8efac9f80b8ebcb2b38b61295d82bdc -Author: Krzysztof Parzyszek -Date: Sat Nov 22 12:28:58 2025 -0600 - - [flang][OpenMP] Implement loop nest parser (#168884) - - Previously, loop constructs were parsed in a piece-wise manner: the - begin directive, the body, and the end directive were parsed separately. - Later on in canonicalization they were all coalesced into a loop - construct. To facilitate that end-loop directives were given a special - treatment, namely they were parsed as OpenMP constructs. As a result - syntax errors caused by misplaced end-loop directives were handled - differently from those cause by misplaced non-loop end directives. - - The new loop nest parser constructs the complete loop construct, - removing the need for the canonicalization step. Additionally, it is the - basis for parsing loop-sequence-associated constructs in the future. - - It also removes the need for the special treatment of end-loop - directives. While this patch temporarily degrades the error messaging - for misplaced end-loop directives, it enables uniform handling of any - misplaced end-directives in the future. - -commit 8be46410248f8298af1f12be1c52e2824ce25951 -Author: Eugene Epshteyn -Date: Fri Nov 21 12:03:51 2025 -0500 - - [flang] Use hlfir.cmpchar for SELECT CASE of charsSelect case hlfir cmpchar (#168476) - - For SELECT CASE with character selector, instead of allways calling - runtime comparison function, emit hlfir.cmpchar. This has different - behaviors at different optimization levels: at -O0, it still emits - flang-rt call, but at higher optimization levels it does inline - comparison. Modify test/Lower/select-case-statement.f90 to test both - comparison cases. - -commit f4ebee0ca980f807de32841288b3785dadbc471d -Author: Ferran Toda -Date: Fri Nov 21 15:16:30 2025 +0100 - - [Flang][OpenMP] Add semantic support for Loop Sequences and OpenMP loop fuse (#161213) - - This patch adds semantics for the `omp fuse` directive in flang, as - specified in OpenMP 6.0. This patch also enables semantic support for - loop sequences which are needed for the fuse directive along with - semantics for the `looprange` clause. These changes are only semantic. - Relevant tests have been added , and previous behavior is retained with - no changes. - - --------- - - Co-authored-by: Ferran Toda - Co-authored-by: Krzysztof Parzyszek - -commit d69320e775a7c4af8f6e6bb6bd0574ead8e3d69c -Author: Krzysztof Parzyszek -Date: Fri Nov 21 07:40:44 2025 -0600 - - [OpenMP] Introduce "loop sequence" as directive association (#168934) - - OpenMP 6.0 introduced a `fuse` directive, and with it a "loop sequence" - as the associated code. What used to be "loop association" has become - "loop-nest association". - - Rename Association::Loop to LoopNest, add Association::LoopSeq to - represent the "loop sequence" association. - - Change the association of fuse from "block" to "loop sequence". - -commit 364fe55c42aaac63b2a28e54fa4e31cc6efcf4a8 -Author: jeanPerier -Date: Thu Nov 20 15:37:53 2025 +0100 - - [flang] simplify pointer assignments (#168732) - - Pointer assignment lowering was done in different ways depending on - contexts and types, sometimes still using runtime calls when this is not - needed and the complexity of doing this inline is very limited (the - pointer and target descriptors were already prepared inline, the runtime - is just doing the descriptor assignment and ensuring the pointer - descriptor keep its pointer flag). - - Slightly extent the inline version that was used for Forall and use it - for all cases. - When lowering without HLFIR is removed, this will allow removing more - code. - -commit 8c674f04aa57766bbc7fac97c1e42526b22a95a4 -Author: Akash Banerjee -Date: Mon Nov 17 17:18:12 2025 +0000 - - [OpenMP][Flang] Change the OmpDefaultMapperName suffix (#168399) - - This PR fixes a Fortran syntax violation in the OpenMP default mapper - naming convention. The suffix .omp.default.mapper contains dots which - are invalid in Fortran identifiers, causing failures when mappers are - written to and read from module files. The fix changes the suffix to - _omp_default_mapper which uses underscores instead of dots, complying - with Fortran syntax rules. - - Key changes: - - - Changed OmpDefaultMapperName constant from .omp.default.mapper to - _omp_default_mapper - - Added GetUltimate() calls in mapper symbol resolution to properly - handle symbols across module boundaries - - Added new test case verifying default mappers work correctly when - defined in a module and used in consuming programs - - This fixes #168336. - -commit e70e9ec3b83757761ccbba217a566d77b561ec53 -Author: Krzysztof Parzyszek -Date: Mon Nov 17 08:02:36 2025 -0600 - - [flang][OpenMP] Store Block in OpenMPLoopConstruct, add access functions (#168078) - - Instead of storing a variant with specific types, store parser::Block as - the body. Add two access functions to make the traversal of the nest - simpler. - - This will allow storing loop-nest sequences in the future. - -commit 8aa7d823b0cba96e54d4d73539df4b82c3b401b9 -Author: Akash Banerjee -Date: Fri Nov 14 15:59:48 2025 +0000 - - [OpenMP][Flang] Emit default declare mappers implicitly for derived types (#140562) - - This patch adds support to emit default declare mappers for implicit - mapping of derived types when not supplied by user. This especially - helps tackle mapping of allocatables of derived types. - -commit 3b83e7fa4ec18991a25d49741418e2b3d325692c -Author: Jean-Didier PAILLEUX -Date: Fri Nov 14 14:06:46 2025 +0100 - - [flang] Implement !DIR$ IVDEP directive (#133728) - - This directive tells the compiler to ignore vector dependencies in the - following loop and it must be placed before a `do loop`. - - Sometimes the compiler may not have sufficient information to decide - whether a particular loop is vectorizable due to potential dependencies - between iterations and the directive is here to tell to the compiler - that vectorization is safe with `parallelAccesses` metadata. - - This directive is also equivalent to `#pragma clang loop assume(safety)` - in C++ - -commit 056f744789ce3cc0b2e3ac1451f5bb32cc6e133e -Author: Eugene Epshteyn -Date: Fri Nov 14 07:05:08 2025 -0500 - - [flang] Removed old option -fdebug-dump-pre-fir (#168008) - - This option has long been replaced by `-fc1 -fdebug-dump-pft`. Removed - the old option and updated one test that still used it. - -commit 833ffa54f25f4e6716bfd95920a08c6c8abf4b56 -Author: Akash Banerjee -Date: Thu Nov 13 19:21:34 2025 +0000 - - [Flang][OpenMP] Update declare mapper lookup via use-module (#167903) - -commit e1324a93778624661345229f3acfe258bc495d95 -Author: Akash Banerjee -Date: Thu Nov 13 16:05:33 2025 +0000 - - Revert "[Flang][OpenMP] Update declare mapper lookup via use-module" (#167896) - - Reverts llvm/llvm-project#163860 - -commit bb5f3a08b6ee7baeab6cc4635a9240a8b9dbeb9e -Author: Akash Banerjee -Date: Thu Nov 13 15:07:46 2025 +0000 - - [Flang][OpenMP] Update declare mapper lookup via use-module (#163860) - - - Implemented semantic TODO to catch undeclared mappers. - - Fix mapper lookup to include modules imported through USE. - - Update and add tests. - - Fixes #163385. - -commit 7838dbee3a307cd8bd129ee8dbb998209133bffe -Author: Jack Styles -Date: Wed Nov 12 13:15:34 2025 +0000 - - [Flang][OpenMP] Add Lowering support for Collapse with Taskloop (#166791) - - Support for lowering collapse already exists within - `genLoopNestClauses`, which is called when lowering taskloop. However, - the TODO message still included the Collapse clause, so it was not - activated. By removing this, it enables lowering of the Collapse clause - in taskloop. - -commit cfc56c982fe144455db25f20576c9297fc68a8f9 -Author: Abid Qadeer -Date: Wed Nov 12 10:21:32 2025 +0000 - - [flang][debug] Track dummy argument positions explicitly. (#167489) - - CHARACTER dummy arguments were treated as local variables in debug info. - This happened because our method to get the argument number was not - robust. It relied on `DeclareOp` having a direct reference to arguments - which was not the case for character arguments. This is fixed by storing - source-level argument positions in `DeclareOp`. - - Fixes #112886 - -commit d02a5ae10bab57be80bd460d10e6bb30959bdacc -Author: Jean-Didier PAILLEUX -Date: Wed Nov 12 09:40:04 2025 +0100 - - [flang] Adding lowering of TEAMs features to PRIF in MIF Dialect (#165573) - - Support for multi-image features has begun to be integrated into LLVM - with the MIF dialect. - In this PR, you will find lowering and operations related to the TEAM - features (`SYNC TEAM`, `GET_TEAM`, `FORM TEAM`, `CHANGE TEAM`, - `TEAM_NUMBER`). - - Note regarding the operation for `CHANGE TEAM` : This operation is - partial because it does not support the associated list of coarrays - because the allocation of a coarray and the lowering of PRIF's - `prif_alias_{create|destroy}` procedures are not yet supported in Flang. - This will be integrated later. - - Any feedback is welcome. - -commit eb614cda37bdf14d5371f6b41a475c68c9a7fdec -Author: Kaviya Rajendiran <67495422+kaviya2510@users.noreply.github.com> -Date: Tue Nov 11 21:02:58 2025 +0530 - - [Flang][OpenMP][MLIR] Lowering of reduction,inreduction, nogroup and lastprivate clause to MLIR (#166751) - - This patch add MLIR lowering support for nogroup, reduction, inreduction - and lastprivate clauses of taskloop directive. - -commit faf9ac0f6fc284e26515c55787cefd6ec807ab36 -Author: Kaviya Rajendiran <67495422+kaviya2510@users.noreply.github.com> -Date: Tue Nov 11 09:49:14 2025 +0530 - - [Flang][MLIR][OpenMP] Add MLIR lowering support for taskloop clauses. (#165851) - - This patch add MLIR lowering support for the following taskloop clauses: - - 1. Default clause - 2. Shared clause - 3. Allocate clause - 4. Final clause - 5. If clause - 6. Mergeable clause - 7. Priority clause - 8. Untied clause - -commit 86fa018a1dca99a1a199e1a0e6f5730546198824 -Author: agozillon -Date: Tue Nov 11 03:15:58 2025 +0100 - - [Flang][OpenMP] Initial defaultmap(none) implementation (#166715) - - This PR adds defaultmap(none) behaviour to Flang, where we emit a - semantic error if variables within the target construct do not have an - associated data attribute. Similar to the way default behaves, as - described by the OpenMP specification. - -commit cf1f871023e432837581b84c8563f3b0690dd9d3 -Author: Thirumalai Shaktivel <74826228+Thirumalai-Shaktivel@users.noreply.github.com> -Date: Mon Nov 10 09:44:22 2025 +0530 - - [Flang] Add parser support for prefetch directive (#139702) - - Implementation details: - * Recognize prefetch directive in the parser as `!dir$ prefetch ...` - * Unparse the prefetch directive - * Add required tests - - Details on the prefetch directive: - `!dir$ prefetch designator[, designator]...`, where the designator list - can be a variable or an array reference. This directive is used to - insert a hint to the code generator to prefetch instructions for - memory references. - -commit 3d3fab17f5ea8a14eb390f53075c094f5e1f19fa -Author: Krzysztof Parzyszek -Date: Mon Nov 3 07:37:13 2025 -0600 - - [flang][OpenMP] Use OmpDirectiveSpecification in ALLOCATE (#165865) - - The ALLOCATE directive has two forms: - - A declarative form with a standalone directive: - ``` - !$OMP ALLOCATE (variable-list-item...) - ``` - - An executable form that consists of several directives followed by an - ALLOCATE statement: - ``` - !$OMP ALLOCATE (variable-list-item...) - !$OMP ALLOCATE (variable-list-item...) - ... - ALLOCATE (...) - ``` - - The second form was deprecated in OpenMP 5.2 in favor of the ALLOCATORS - construct. - - Since in the parse tree every type corresponding to a directive only - corresponds to a single directive, the executable form is represented by - a sequence of nested OmpAllocateDirectives, e.g. - ``` - !$OMP ALLOCATE(x) - !$OMP ALLOCATE(y) - ALLOCATE(x, y) - ``` - will become - ``` - OmpAllocateDirective - |- ALLOCATE(x) // begin directive - `- OmpAllocateDirective // block - |- ALLOCATE(y) // begin directive - `- ALLOCATE(x, y) // block - ``` - With this change all AST nodes for directives use - OmpDirectiveSpecification as the directive representation. - -commit 63e45ef1bfc0208048fcd9d4264e4e94f508733f -Author: Daniel Chen -Date: Fri Oct 31 09:49:50 2025 -0400 - - To fix polymorphic pointer assignment in FORALL when LHS is unlimited polymorphic and RHS is intrinsic type target (#164999) - - Fixes #143569. - -commit c1779f33bdada6e478e882cc23a647ef9abaad96 -Author: Jean-Didier PAILLEUX -Date: Tue Oct 28 08:02:15 2025 +0100 - - [flang] Implement !DIR$ [NO]INLINE and FORCEINLINE directives (#134350) - - This patch adds the support of these two directives : `!dir$ inline` and - `!dir$ noinline`. - - `!dir$ noinline` tells to the compiler to not perform inlining on - specific function calls by adding the `noinline` metadata on the call. - - `!dir$ inline` tells to the compiler to attempt inlining on specific - function calls by adding the `inlinehint` metadata on the call. - - `!dir$ forceinline` tells to the compiler to always perfom inlining on - specific function calls by adding the `alwaysinline` metadata on the - call. - - Currently, these directives can be placed before a `DO LOOP`, call - functions or assignments. Maybe other statements can be added in the - future if needed. - - For the `inline` directive the correct name might be `forceinline` but - I'm not sure ? - -commit 23ead476550a667d532554e966704494173fd9d7 -Author: Jakub Kuderski -Date: Wed Oct 22 12:47:48 2025 -0400 - - [flang][mlir] Migrate to free create functions. NFC. (#164657) - - See - https://discourse.llvm.org/t/psa-opty-create-now-with-100-more-tab-complete/87339. - - I plan to mark these as deprecated in - https://github.com/llvm/llvm-project/pull/164649. - -commit 2dbe9592663a701546efd1ec1396417629542e4b -Author: Daniel Chen -Date: Wed Oct 22 10:24:39 2025 -0400 - - Get the BoxType from the RHS instead of LHS for polymorphic pointer assignment inside FORALL. (#164279) - - Fixes #153220 - -commit c9fb37c75f741f1179f2d2c661d27d36645b0310 -Author: jeanPerier -Date: Wed Oct 22 11:46:18 2025 +0200 - - [flang][FIR] add fir.assumed_size_extent to abstract assumed-size extent encoding (#164452) - - The purpose of this patch is to allow converting FIR array representation to - memref when possible without hitting memref verifier issue. - - The issue was that FIR arrays may be assumed size, in which case the - last dimension will not be known at runtime. Flang uses -1 to encode - this to fulfill Fortran 2023 standard requirements in 18.5.3 point 5 - about CFI_desc_t. - - When arrays are converted to memeref, if this `-1` reaches memeref - operations, it triggers verifier errors (even if the conversion happened - in code that guards the code to be entered at runtime if the array is - assumed-size because folders/verifiers do not take into account - reachability). - - This follows-up on discussions in #163505 merge requests - -commit f2b20d3410e4c0cc3be4a5b69e00120cab9f1d5e -Author: agozillon -Date: Tue Oct 21 21:54:25 2025 +0200 - - [Flang][OpenMP][Dialect] Swap to using MLIR dialect enum to encode map flags (#164043) - - This PR shifts from using the LLVM OpenMP enumerator bit flags to an - OpenMP dialect specific enumerator. This allows us to better represent - map types that wouldn't be of interest to the LLVM backend and runtime - in the dialect. - - Primarily things like - ref_ptr/ref_ptee/ref_ptr_ptee/atach_none/attach_always/attach_auto which - are of interest to the compiler for certrain transformations (primarily - in the FIR transformation passes dealing with mapping), but the runtime - has no need to know about them. It also means if another OpenMP - implementation comes along they won't need to stick to the same bit flag - system LLVM chose/do leg work to address it. - -commit 5cd9f0f655ac2ab9da4fbd049fbcba6eb0d793b9 -Author: Peter Klausler -Date: Mon Oct 20 13:20:33 2025 -0700 - - [flang] Move parse tree tool to Parser/tools.h (#163998) - - Move the parse tree utility function - semantics::getDesignatorNameIfDataRef to Parser/tools.h and rename it to - comply with the local style. - -commit 7d25ba39c8ac4a08c30620463bdc5f586b43c1cd -Author: Kazu Hirata -Date: Fri Oct 17 07:27:28 2025 -0700 - - [flang] Replace LLVM_ATTRIBUTE_UNUSED with [[maybe_unused]] (NFC) (#163916) - - This patch replaces LLVM_ATTRIBUTE_UNUSED with [[maybe_unused]], - introduced as part of C++17. - -commit e55071b157870d6e046e6bb315a449a2445c7e41 -Author: Valentin Clement (バレンタイン クレメン) -Date: Thu Oct 16 10:33:50 2025 -1000 - - [flang][cuda] Extent detection of data transfer with conversion (#163852) - -commit d55879de50024a29bc5533337f3cc7d8553d6cae -Author: Krzysztof Parzyszek -Date: Thu Oct 16 07:02:49 2025 -0500 - - [flang][OpenMP] Emit requirements in module files (#163449) - - For each program unit, collect the set of requirements from REQUIRES - directives in the source, and modules used by the program unit, and add - them to the details of the program unit symbol. - - The requirements in the symbol details as now stored as clauses. Since - requirements need to be emitted in the module files as OpenMP - directives, this makes the clause emission straightforward via - getOpenMPClauseName. - - Each program unit, including modules, the corresponding symbol will have - the transitive closure of the requirements for everything contained or - used in that program unit. - -commit 7eee67202378932d03331ad04e7d07ed4d988381 -Author: Kelvin Li -Date: Sat Oct 11 09:48:02 2025 -0400 - - [flang] Fix build breakage with FLANG_ENABLE_WERROR on (NFC) (#162894) - -commit de55329b6ed7184771fb036e52475a63ebc67c97 -Author: jeanPerier -Date: Fri Oct 10 11:19:54 2025 +0200 - - [flang][NFC] update createTempFromMold interface to return a bool (#162680) - - Some createTempFromMold users are looking for a compile time constant - for the `mustFree`. Instead of having them retrieving it, update the - interface to return a bool. The only users that needs a value was - `packageBufferizedExpr` and it has an overload that accept bool too. - - Tests are updated to reflect that this changes the place where the - boolean is created in BufferizeHLFIR, and just removes its creation in - contexts it is not needed. - -commit 6a02c0f2fa696b226e9d3c5250802cc045985876 -Author: Alexey Bataev -Date: Thu Oct 9 14:32:10 2025 -0400 - - [Flang]Fix propagation of loop collapse number for target-based directives (#162707) - -commit 5873d6a371655d93df8221c92a51030a4619f84f -Author: Susan Tan (ス-ザン タン) -Date: Thu Oct 9 12:50:24 2025 -0400 - - [flang][openacc] Add support for force clause for loop collapse (#162534) - - Currently the force clause `collapse (force:num_level)` is NYI. Added - support to sink any prologue and epilogue code to the inner most level - as specified. - -commit 121026b186687ea00e5f792ea84d43b09597088a -Author: jeanPerier -Date: Thu Oct 9 14:26:41 2025 +0200 - - [flang][openacc] map data operand results to symbols inside compute region (#162306) - - Variable references inside OpenACC compute and loop region were - currently always lowered to usages of the same SSA values than in the - host thread, even for variables that appear in data clauses and for - which acc data operations are created. - - This makes it a non-trivial task to identify implicit data usages vs - usage of data appearing in clauses because the SSA addresses used in the - region may have a non-trivial SSA relationship with the SSA addresses - used as inputs of the data operations, especially after CSE runs that - may merge component or array element addressing operations with similar - addressing on the host thread (fir.coordinate/hlfir.designate). - - This patch updates OpenACC lowering to remap the Symbol that appear in - data clauses to the related acc data operation result for the scope of - the compute or loop region. - - To allow FIR passes to reason about these addresses, a new hlfir.declare - operation is created with the acc data operation result. This gives - access to the shape, contiguity, attributes, and dummy argument - relationships inside the region without having FIR extended to - understand the data operations. - -commit 375f48942b9a3f3fbd82133390af25b6c96f1460 -Author: Michael Kruse -Date: Fri Oct 3 15:52:48 2025 +0200 - - [Flang] Add standalone tile support (#160298) - - Add support for the standalone OpenMP tile construct: - ```f90 - !$omp tile sizes(...) - DO i = 1, 100 - ... - ``` - - This is complementary to #143715 which added support for the tile - construct as part of another loop-associated construct such as - worksharing-loop, distribute, etc. - -commit c242aff2452fb662a7ea23954abe654b51182b8e -Author: Valentin Clement (バレンタイン クレメン) -Date: Thu Oct 2 04:43:45 2025 -1000 - - [flang][cuda][openacc] Create new symbol in host_data region for CUDA Fortran interop (#161613) - -commit 727aad15f0a897826fc9102b5a090b977c554097 -Author: Krzysztof Parzyszek -Date: Fri Sep 26 15:47:31 2025 -0500 - - [flang][OpenMP] Use OmpDirectiveSpecification in DECLARE_TARGET (#160573) - -commit d6e20c42c1f916fa925f0b1c2c37f3098ada1686 -Author: Slava Zakharin -Date: Fri Sep 26 09:40:04 2025 -0700 - - [flang] Clean-up for fir.do_loop generation in lowering. (#160630) - - This patch changes two things: - 1. We do not need to use the loop counter's last value - for regular do-loops in Lowering. - 2. The loop counter's increment is implied by fir.do_loop - operation, so there is no need to increment it explicitly. - - The last point has been especially confusing to me, because it was - unclear why we have an explicit increment if it is implied. - It looks like CFGConversion somehow still makes the final code - correct, i.e. the counter is not incremented twice. - Anyway, the new lowering should look more concise. - -commit 3ca59104cfe9c47ef64ce44491e7f0c1fbc9f788 -Author: Krzysztof Parzyszek -Date: Tue Sep 23 08:50:15 2025 -0500 - - [flang][OpenMP] Use OmpDirectiveSpecification in DECLARE_MAPPER (#160169) - -commit 8c189327e5573e597b3eead418beab6aaea72ca3 -Author: Krzysztof Parzyszek -Date: Fri Sep 19 10:50:23 2025 -0500 - - [flang][OpenMP] Use OmpDirectiveSpecification in SECTIONS (#159580) - -commit e75e28ad3c9558c2cca32cd16cd5681b5219ff8d -Author: Krzysztof Parzyszek -Date: Tue Sep 16 11:38:03 2025 -0500 - - [flang][OpenMP] Use OmpDirectiveSpecification in Omp[Begin|End]LoopDi… (#159087) - - …rective - - This makes accessing directive components, such as directive name or the - list of clauses simpler and more uniform across different directives. It - also makes the parser simpler, since it reuses existing parsing - functionality. - - The changes are scattered over a number of files, but they all share the - same nature: - - getting the begin/end directive from OpenMPLoopConstruct, - - getting the llvm::omp::Directive enum, and the source location, - - getting the clause list. - -commit 5365f8bc91d2d820092e904ecab21b841e3f5abb -Author: Akash Banerjee -Date: Mon Sep 15 16:11:55 2025 +0100 - - Revert "[NFC][Flang] Move bounds helper functions to Util header." (#158654) - - Reverts llvm/llvm-project#154164 - -commit 32ab6ff9f95739cba9954e666479d30e126af53c -Author: Akash Banerjee -Date: Mon Sep 15 15:45:49 2025 +0100 - - [NFC][Flang] Move bounds helper functions to Util header. (#154164) - - This PR moves the `needsBoundsOps` and `genBoundsOps` helper functions - to `flang/include/flang/Optimizer/OpenMP/Utils.h`. - -commit 5149e51cb25d6a68365ca3bd9300cff1b18213e2 -Author: Jean-Didier PAILLEUX -Date: Fri Sep 12 15:29:21 2025 +0200 - - [flang][Lower] Add lowering to SYNC ALL, SYNC MEMORY and SYNC IMAGES to PRIF (#154166) - - In relation to the approval and merge of the - https://github.com/llvm/llvm-project/pull/76088 specification about - multi-image features in Flang. - Here is a PR on adding support for SYNC ALL, SYNC MEMORY and SYNC IMAGES - in conformance with the PRIF specification. - - --------- - - Co-authored-by: Katherine Rasmussen - -commit d452e67ee7b5d17aa040f71d8997abc1a47750e4 -Author: Jan Leyonberg -Date: Wed Sep 10 09:25:40 2025 -0400 - - [flang][OpenMP] Enable tiling (#143715) - - This patch enables tiling in flang. In MLIR tiling is handled by - changing the the omp.loop_nest op to be able to represent both collapse - and tiling, so the flang front-end will combine the nested constructs into - a single MLIR op. The MLIR->LLVM-IR lowering of the LoopNestOp is - enhanced to first do the tiling if present, then collapse. - -commit 2a2296b1aab4614bf6c95c3003000832c9d43de5 -Author: Razvan Lupusoru -Date: Tue Sep 9 13:09:04 2025 -0700 - - [flang][acc] Fix incorrect loop body nesting and IV value use (#157708) - - Two issues are being resolved: - - Incorrect loop body nesting caused by insertion point not being - updated after the loop. The scenario is now being tested through - `nested_do_loops` function in the test. - - Incorrect IV ssa values due to incorrect handling of scoping. - - Additionally, this also adds `--openacc-do-loop-to-acc-loop` flag so - that the implicit conversion can be disabled for testing. - -commit c85e6ac74d7ccb36dfaaf94989f27c22cf5c7e7c -Author: Kareem Ergawy -Date: Mon Sep 8 14:33:00 2025 +0200 - - [NFC][flang][OpenMP] Extract target region utils to map or clone outside values (#155754) - - Following up on #154483, this PR introduces further refactoring to - extract some shared utils between OpenMP lowering and `do concurrent` - conversion pass. In particular, this PR extracts 2 utils that handle - mapping or cloning values used inside target regions but defined - outside. - - Later `do concurrent` PR(s) will also use these utils. - - PR stack: - - https://github.com/llvm/llvm-project/pull/155754 ◀️ - - https://github.com/llvm/llvm-project/pull/155987 - - https://github.com/llvm/llvm-project/pull/155992 - - https://github.com/llvm/llvm-project/pull/155993 - - https://github.com/llvm/llvm-project/pull/156589 - - https://github.com/llvm/llvm-project/pull/156610 - - https://github.com/llvm/llvm-project/pull/156837 - -commit 83da8d08ff110fd5bf3fff17043420ba442e300c -Author: Slava Zakharin -Date: Thu Sep 4 15:49:11 2025 -0700 - - [flang] Attach proper storage to [hl]fir.declare in lowering. (#155742) - - As described in - https://discourse.llvm.org/t/rfc-flang-representation-for-objects-inside-physical-storage/88026, - `[hl]fir.declare` should carry information about the layout - of COMMON/EQUIVALENCE variables within the physical storage. - - This patch modifes Flang lowering to attach this information. - -commit 88b71e20488ae0987b7ec7cfa9d49d9358b1f38c -Author: Krzysztof Parzyszek -Date: Fri Aug 29 07:37:48 2025 -0500 - - [flang][OpenMP] Replace OpenMPBlockConstruct with OmpBlockConstruct (#155872) - - OpenMPBlockConstruct, somewhat confusingly, represents most but not all - block-associated constructs. It's derived from OmpBlockConstruct, as are - all the remaining block-associated constructs. - - It does not correspond to any well-defined group of constructs. It's the - collection of constructs that don't have their own types (and those that - do have their own types do so for their own reasons). - - Using the broader OmpBlockConstruct in type-based visitors won't cause - issues, because the specific overloads (for classes derived from it) - will always be preferred. - -commit 9cf8752ccfd194c3fa1cda641db2e3c77aa4915c -Author: Kajetan Puchalski -Date: Thu Aug 28 15:58:29 2025 +0100 - - [flang][OpenMP] Handle symbols on composite simd with multiple privatizers (#155640) - - In some cases, a clause on a composite simd construct applied to simd - can be using a symbol that is also used by another privatizer, not - applied to simd. Correctly handle this scenario by checking which - directive the privatizer is being generated for while determining - whether to emit the copy region. - - Fixes #155195. - - Signed-off-by: Kajetan Puchalski - -commit 86e4c175e3de73bf529b6849614fefcb4b8d7011 -Author: Kareem Ergawy -Date: Wed Aug 27 18:25:36 2025 +0200 - - [NFC][flang][OpenMP] Create `FortranUtils` lib and move `createMapInfoOp` to it (#154483) - -commit 044e1aabbd4b92a2e05a52e9a1630c2fe548d358 -Author: Tom Eccles -Date: Tue Aug 26 11:45:56 2025 +0100 - - [flang][OpenMP] move omp end sections validation to semantics (#154740) - - See #90452. The old parse tree errors exploded to thousands of unhelpful - lines when there were multiple missing end directives. - - Instead, allow a missing end directive in the parse tree then validate - that it is present during semantics (where the error messages are a lot - easier to control). - -commit 21019a3c11b56776809ec65af35b050f58570b77 -Author: Chaitanya -Date: Tue Aug 26 09:30:21 2025 +0530 - - [flang][openmp] Add Lowering to omp mlir for workdistribute construct (#154378) - - This PR adds lowering of workdistribute construct in flang to omp mlir dialect workdistribute op. - - The work in this PR is c-p and updated from @ivanradanov commits from coexecute implementation: - flang_workdistribute_iwomp_2024 - -commit 8a5b6b302eb55e514f086ffcee3cf4f81750bb5a -Author: Kazu Hirata -Date: Wed Aug 20 16:30:24 2025 -0700 - - [flang] Use SmallPtrSet directly instead of SmallSet (NFC) (#154471) - - I'm trying to remove the redirection in SmallSet.h: - - template - class SmallSet : public SmallPtrSet - {}; - - to make it clear that we are using SmallPtrSet. There are only - handful places that rely on this redirection. - - This patch replaces SmallSet to SmallPtrSet where the element type is - a pointer. - -commit 42350f428db0d053610a82a747eb240afc4d9250 -Author: Krzysztof Parzyszek -Date: Tue Aug 19 08:32:43 2025 -0500 - - [flang][OpenMP] Parse GROUPPRIVATE directive (#153807) - - No semantic checks or lowering yet. - -commit 0e93dbc6b1cac9c69c546cff7b5dd5935917ae9e -Author: Kareem Ergawy -Date: Tue Aug 19 12:07:17 2025 +0200 - - [flang] `do concurrent`: Enable delayed localization by default (#154303) - - Enables delayed localization by default for `do concurrent`. Tested both - gfortran and Fujitsu test suites. - - All tests pass for gfortran tests. For Fujitsu, enabled delayed - localization passes more tests: - - Delayed localization disabled: - Testing Time: 7251.76s - Passed : 88520 - Failed : 162 - Executable Missing: 408 - - Delayed localization enabled: - Testing Time: 7216.73s - Passed : 88522 - Failed : 160 - Executable Missing: 408 - -commit acdbb00af5d0b6469fceb8abb26634de2dbee985 -Author: Jean-Didier PAILLEUX -Date: Sat Aug 16 01:04:49 2025 +0200 - - [flang] Adding support of -fcoarray flang and init PRIF (#151675) - - In relation to the approval and merge of the - [PRIF](https://github.com/llvm/llvm-project/pull/76088) specification - about multi-image features in Flang, here is a first PR to add support - for the `-fcoarray` compilation flag and the initialization of the PRIF - environment. - Other PRs will follow for adding support of lowering to PRIF. - -commit b9e33fd49386a4be569e7d579c24e0e2a9607943 -Author: Kareem Ergawy -Date: Fri Aug 15 08:45:02 2025 +0200 - - [flang] Do not re-localize loop ivs when nested inside `block`s (#153350) - - Consider the following example: - ```fortran - implicit none - integer :: i, j - - do concurrent (i=1:10) local(j) - block - do j=1,20 - end do - end block - end do - ``` - - Without the fix introduced in this PR, the compiler would "re-localize" - the `j` variable inside the `fir.do_concurrent` loop: - ```mlir - fir.do_concurrent { - %7 = fir.alloca i32 {bindc_name = "j"} - %8:2 = hlfir.declare %7 {uniq_name = "_QFloop_in_nested_blockEj"} : (!fir.ref) -> (!fir.ref, !fir.ref) - ... - fir.do_concurrent.loop (%arg0) = (%5) to (%6) step (%c1) local(@_QFloop_in_nested_blockEj_private_i32 %4#0 -> %arg1 : !fir.ref) { - %12:2 = hlfir.declare %arg1 {uniq_name = "_QFloop_in_nested_blockEj"} : (!fir.ref) -> (!fir.ref, !fir.ref) - ... - %17:2 = fir.do_loop %arg2 = %14 to %15 step %c1_1 iter_args(%arg3 = %16) -> (index, i32) { - fir.store %arg3 to %8#0 : !fir.ref - ... - } - } - } - ``` - - This happened because we did a shallow look-up of `j` and since the loop - is nested inside a `block`, the look-up failed and we re-created a local - allocation for `j` inside the parent `fir.do_concurrent` loop. This - means that we ended up not using the actual localized symbol which is - passed as a region argument to the `fir.do_concurrent.loop` op. - - In case of `j`, we do not need to do a shallow look-up. The shallow - look-up is only needed if a symbol is an OpenMP private one or an - iteration variable of a `do concurrent` loop. Neither of which applies - to `j`. - - With the fix, `j` is properly resolved to the `local` region argument: - ```mlir - fir.do_concurrent { - ... - fir.do_concurrent.loop (%arg0) = (%5) to (%6) step (%c1) local(@_QFloop_in_nested_blockEj_private_i32 %4#0 -> %arg1 : !fir.ref) { - ... - %10:2 = hlfir.declare %arg1 {uniq_name = "_QFloop_in_nested_blockEj"} : (!fir.ref) -> (!fir.ref, !fir.ref) - ... - %15:2 = fir.do_loop %arg2 = %12 to %13 step %c1_1 iter_args(%arg3 = %14) -> (index, i32) { - fir.store %arg3 to %10#0 : !fir.ref - ... - } - } - } - ``` - -commit d3d96e20573771c9b0f54a07c1557c448b1d9ae1 -Author: Kajetan Puchalski -Date: Thu Aug 14 14:20:15 2025 +0100 - - [flang][OpenMP] Add -f[no]-openmp-simd (#150269) - - Both clang and gfortran support the -fopenmp-simd flag, which enables - OpenMP support only for simd constructs, while disabling the rest of - OpenMP. - - Implement the appropriate parse tree rewriting to remove non-SIMD OpenMP - constructs at the parsing stage. - - Add a new SimdOnly flang OpenMP IR pass which rewrites generated OpenMP - FIR to handle untangling composite simd constructs, and clean up OpenMP - operations leftover after the parse tree rewriting stage. - With this approach, the two parts of the logic required to make the flag - work can be self-contained within the parse tree rewriter and the MLIR - pass, respectively. It does not need to be implemented within the core - lowering logic itself. - - The flag is expected to have no effect if -fopenmp is passed explicitly, - and is only expected to remove OpenMP constructs, not things like OpenMP - library functions calls. This matches the behaviour of other compilers. - - --------- - - Signed-off-by: Kajetan Puchalski - -commit a2899c457ecac9f2511fa08926bcf1c22eee1d14 -Author: Valentin Clement (バレンタイン クレメン) -Date: Wed Aug 13 10:55:15 2025 -0700 - - [flang][cuda] Support data transfer with conversion (#153242) - - When the rhs of the data transfer is from a different type, allocate a - new temp on the host and first transfer the rhs to it. Then, use the - elemental op created to do the conversion. - -commit 3b10b9a2b03a2954d9da54c0f1137daeb9e339c4 -Author: Akash Banerjee -Date: Mon Aug 11 12:45:22 2025 +0100 - - [MLIR][OpenMP] Add lowering support for AUTOMAP modifier (#151513) - - Add Automap modifier to the MLIR op definition for the DeclareTarget - directive's Enter clause. Also add lowering support in Flang. - - Automap Ref: OpenMP 6.0 section 7.9.7. - -commit d7d0d7a80fc343750bbf85ea8c184737d9c70f62 -Author: Kareem Ergawy -Date: Fri Aug 8 06:22:53 2025 +0200 - - [flang] Skip processing reductions for unstructured `do concurrent` loops (#150188) - - Fixes #149563 - - When emitting unstructured `do concurrent` loops, reduction processing - should be skipped since we are not emitting `fir.do_concurrent` loop in - the first place. - -commit e368b5343d037c89051097c2a87a6fb76548014e -Author: Krzysztof Parzyszek -Date: Thu Aug 7 08:10:25 2025 -0500 - - [flang][OpenMP] Make OpenMPCriticalConstruct follow block structure (#152007) - - This allows not having the END CRITICAL directive in certain situations. - Update semantic checks and symbol resolution. - -commit eb0ddba26b6a265b44b442ae666db43b9f28b26a -Author: Valentin Clement (バレンタイン クレメン) -Date: Wed Aug 6 21:49:55 2025 -0700 - - Reland "[flang][cuda] Set the allocator of derived type component after allocation" (#152418) - - Reviewed in #152379 - - Move the allocator index set up after the allocate statement otherwise - the derived type descriptor is not allocated. - - Support array of derived-type with device component - -commit 2696e8c1499682f0b1f357d9035ed59f544892f8 -Author: Valentin Clement (バレンタイン クレメン) -Date: Wed Aug 6 18:49:52 2025 -0700 - - [flang][cuda] Remove too restrictive assert for data transfer (#152398) - - When the rhs is a an array element, the assert was triggered but this is - still a valid transfer. Remove the assert. The operation has a verifier - to check its validity. - -commit 7d3134f6cc59f47460646a13abcf824bae05d772 -Author: Valentin Clement (バレンタイン クレメン) -Date: Wed Aug 6 15:55:53 2025 -0700 - - Revert "[flang][cuda] Set the allocator of derived type component after allocation" (#152402) - - Reverts llvm/llvm-project#152379 - - Buildbot failure - https://lab.llvm.org/buildbot/#/builders/207/builds/4905 - -commit d897355876287e410d35f1f0ac74d79955d50dd4 -Author: Valentin Clement (バレンタイン クレメン) -Date: Wed Aug 6 15:14:00 2025 -0700 - - [flang][cuda] Set the allocator of derived type component after allocation (#152379) - - - Move the allocator index set up after the allocate statement otherwise - the derived type descriptor is not allocated. - - Support array of derived-type with device component - -commit 3847620ba9a22a13de30bd77d059aae6f484dd94 -Author: Valentin Clement (バレンタイン クレメン) -Date: Tue Aug 5 07:27:43 2025 -0700 - - [flang][NFC] Move the rest of ops creation to new APIs (#152079) - -commit 47ef3d069bcfb8ec31c06cdd619557c84d1084ad -Author: Carlos Seo -Date: Tue Aug 5 10:53:18 2025 -0300 - - [Flang] Avoid crash when a function return is undefined (#151577) - - Properly terminate the StatementContext cleanup when a function return - value is undefined. - - Fixes #126452 - -commit cc2a385e65483688d3e4a0091e0767960f9eb8c2 -Author: agozillon -Date: Tue Aug 5 15:48:37 2025 +0200 - - [Flang][OpenMP] Make implicitly captured scalars fully firstprivatized (#147442) - - Currently, we indicate to the runtime that implicit scalar captures are - firstprivate (via map and - capture types), enough for the runtime trace to treat it as such, but we - do not CodeGen the IR - in such a way that we can take full advantage of this aspect of the - OpenMP specification. - - This patch seeks to change that by applying the correct symbol flags - (firstprivate/implicit) to the - implicitly captured scalars within target regions, which then triggers - the delayed privitization code - generation for these symbols, bringing the code generation in-line with - the explicit firstpriviate - clause. Currently, similarly to the delayed privitization I have - sheltered this segment of code - behind the EnabledDelayedPrivitization flag, as without it, we'll - trigger an compiler error for - firstprivate not being supported any time we implicitly capture a scalar - and try to firstprivitize - it, in future when this flag is removed it can also be removed here. So, - for now, you need to - enable this via providing the compiler the flag on compilation of any - programs. - -commit e4d3dc6359f568a9b0ac2e1010bbc7d13f4982b6 -Author: Valentin Clement (バレンタイン クレメン) -Date: Mon Aug 4 22:09:08 2025 -0700 - - [flang][NFC] Update HLFIR ops creation to the new APIs (#152075) - - See #147168 - -commit 3b23fdb35def583ae5db58576a7fcb312315879e -Author: Valentin Clement (バレンタイン クレメン) -Date: Mon Aug 4 17:53:44 2025 -0700 - - [flang][NFC] Update more FIR op creation to the new APIs (#152060) - -commit 05b52ef909475f4048e5b8cd86b3671772506682 -Author: Valentin Clement (バレンタイン クレメン) -Date: Mon Aug 4 16:09:24 2025 -0700 - - [flang][cuda][NFC] Update to the new create APIs (#152050) - - Some operation creations were updated in flang directory but not all. - Migrate the CUF ops to the new create APIs introduce in #147168 - -commit 8cc4c6d78f08ca38b5051a09a71ec14ae8931cda -Author: Tom Eccles -Date: Mon Aug 4 12:01:27 2025 +0100 - - [flang][Lower] Make reduction processing failure a hard error (#150233) - - See #150178 - - This may regress some test cases which only ever passed by accident. - - I've tested SPEC2017 and a sample of applications to check that this - doesn't break anything too obvious. Presumably this was not a widely - used feature or we would have noticed the bug sooner. - - I'm unsure whether this should be backported to LLVM 21 or not: I think - it is much better to refuse to compile than to silently produce the - wrong result, but there is a chance this could regress something which - previously worked by accident. Opinions welcome. - -commit 2f33b01651b1041682bab363e556ff1a396262fc -Author: Razvan Lupusoru -Date: Fri Aug 1 09:27:36 2025 -0700 - - [flang] Ensure lowering diagnostic handler does not outlive lowering (#151608) - - When the LoweringBridge is created, it registers an MLIR Diagnostics - handler with the MLIRContext. However, it never deregisters it once - lowering is finished. - - This fixes this particular scenario. It also makes it so that the - Diagnostics handler is optional. - -commit 6533ad04edcbc02d012cdb181d8745ca0d2f2e75 -Author: Krzysztof Parzyszek -Date: Fri Aug 1 07:52:59 2025 -0500 - - [flang][OpenMP] Make all block constructs share the same structure (#150956) - - The structure is - - OmpBeginDirective (aka OmpDirectiveSpecification) - - Block - - optional (aka optional) - - The OmpBeginDirective and OmpEndDirective are effectively different - names for OmpDirectiveSpecification. They exist to allow the semantic - analyses to distinguish between the beginning and the ending of a block - construct without maintaining additional context. - - The actual changes are in the parser: parse-tree.h and openmp-parser.cpp - in particular. The rest is simply changing the way the directive/clause - information is accessed (typically for the simpler). - - All standalone and block constructs now use OmpDirectiveSpecification to - store the directive/clause information. - -commit a361cde4421540e7ba3d6cdae0ef6e2860e126d8 -Author: Kajetan Puchalski -Date: Fri Aug 1 13:12:57 2025 +0100 - - [flang][OpenMP] Support delayed privatisation for composite distribute simd (#151169) - - Implement the lowering for delayed privatisation for composite - "distibute simd"constructs. Fixes new crashes previously masked by simd - information on composite constructs being ignored. - - Signed-off-by: Kajetan Puchalski - -commit c0591477ac99bf8ae51ce116a6471420f128ac9f -Author: Kajetan Puchalski -Date: Fri Aug 1 13:12:21 2025 +0100 - - [flang][OpenMP] Support delayed privatisation for composite do simd (#150979) - - Implement the lowering for delayed privatisation for composite "do simd" - constructs. Fixes new crashes previously masked by simd information on - composite constructs being ignored, such as llvm#150975. - - Signed-off-by: Kajetan Puchalski - -commit 698492290540c9a75c1b8acc75f6c0fd4e468f5b -Author: Krzysztof Parzyszek -Date: Thu Jul 31 07:51:22 2025 -0500 - - [flang][OpenMP] Store directive information in OpenMPSectionConstruct (#150804) - - The OpenMPSectionConstruct corresponds to the `!$omp section` directive, - but there is nothing in the AST node that stores the directive - information. Even though the only possibility (at the moment) is - "section" without any clauses, for improved generality it is helpful to - have that information anyway. - -commit 27f777e9c06daeb03efad9230fe080df2a3a94c5 -Author: Michael Kruse -Date: Wed Jul 30 09:20:42 2025 +0200 - - [Flang][OpenMP] Skip DSA for canonical loops (#150593) - - OpenMP loop transformations to not have data-sharing attributes and do - not explicitly privatize the loop variable. The DataSharingProcessor was - still used in #144785 because `createAndSetPrivatizedLoopVar` expected - it. - - We skip that function and directly write to the loop variable. If the - loop variable is implicitly or explicitly privatized, it will be due to - surrounding OpenMP constructs such as `parallel`. - -commit 4128cf3b26cff997f1f315ee571cbc7110bc250c -Author: Razvan Lupusoru -Date: Tue Jul 29 10:03:22 2025 -0700 - - [flang][acc] Lower do and do concurrent loops specially in acc regions (#149614) - - When OpenACC is enabled and Fortran loops are annotated with `acc loop`, - they are lowered to `acc.loop` operation. And rest of the contained - loops use the normal FIR lowering path. - - Hovever, the OpenACC specification has special provisions related to - contained loops and their induction variable. In order to adhere to - this, we convert all valid contained loops to `acc.loop` in order to - store this information appropriately. - - The provisions in the spec that motivated this change (line numbers are - from OpenACC 3.4): - - 1353 Loop variables in Fortran do statements within a compute - construct are predetermined to be private to the thread that executes - the loop. - - 3783 When do concurrent appears without a loop construct in a kernels - construct it is treated as if it is annotated with loop auto. If it - appears in a parallel construct or an accelerator routine then it is - treated as if it is annotated with loop independent. - - By valid loops - we convert do loops and do concurrent loops which have - induction variable. Loops which are unstructured are not handled. - -commit 9d642b0ec806d13002e2f0b50091ca9656b238e5 -Author: Anchu Rajendran S -Date: Mon Jul 28 05:46:10 2025 -0700 - - [flang][MLIR][OpenMP][llvm]Atomic Control Support (#150860) - -commit efe1aa8904ea3ad8b19ab2aa5660e27a08c7d694 -Author: Kiran Chandramohan -Date: Thu Jul 24 21:54:26 2025 +0100 - - Revert "[flang][flang-driver][mlir][OpenMP] atomic control support" (#150504) - - Reverts llvm/llvm-project#143441 - - Reverting due to CI failure - https://lab.llvm.org/buildbot/#/builders/53/builds/18055. - -commit f44346dc1f6252716cfc62bb0687e3932a93089f -Author: Anchu Rajendran S -Date: Thu Jul 24 09:49:38 2025 -0700 - - [flang][flang-driver][mlir][OpenMP] atomic control support (#143441) - - Atomic Control Options are used to specify architectural characteristics - to help lowering of atomic operations. The options used are: - `-f[no-]atomic-remote-memory`, `-f[no-]atomic-fine-grained-memory`, - `-f[no-]atomic-ignore-denormal-mode`. - Legacy option `-m[no-]unsafe-fp-atomics` is aliased to - `-f[no-]ignore-denormal-mode`. - More details can be found in - https://github.com/llvm/llvm-project/pull/102569. This PR implements the - frontend support for these options with OpenMP atomic in flang. - - Backend changes are available in the draft PR: - https://github.com/llvm/llvm-project/pull/143769 which will be raised - after this merged. - -commit 1ba3859cdbf263182502b1c00546e985bdb633da -Author: Krzysztof Parzyszek -Date: Thu Jul 24 08:59:13 2025 -0500 - - [flang][OpenMP] Parse strictly- and loosely-structured blocks (#150298) - - Block-associated constructs have, as their body, either a strictly- or a - loosely-structured block. In the former case the end-directive is - optional. - - The existing parser required the end-directive to be present in all - cases. - - Note: - The definitions of these blocks in the OpenMP spec exclude cases where - the block contains more than one construct, and the first one is - BLOCK/ENDBLOCK. For example, the following is invalid: - ``` - !$omp target - block ! This cannot be a strictly-structured block, but - continue ! a loosely-structured block cannot start with - endblock ! BLOCK/ENDBLOCK - continue ! - !$omp end target - ``` - -commit 97faab7bc279516a31001621203f4ff5a158ed13 -Author: Kazu Hirata -Date: Wed Jul 23 08:33:32 2025 -0700 - - [flang] Fix a warning - - This patch fixes: - - flang/lib/Lower/Bridge.cpp:2128:10: error: unused variable 'result' - [-Werror,-Wunused-variable] - -commit fc0a978327215aa8883ae6f18d1e316f3c04520a -Author: Carlos Seo -Date: Wed Jul 23 11:16:11 2025 -0300 - - [Flang] Fix ASSIGN statement (#149941) - - Handle the case where the assigned variable also has a pointer - attribute. - - Fixes #121721 - -commit 43db6c5cc1a81b540ddca49bee197895c420ec2d -Author: Krzysztof Parzyszek -Date: Wed Jul 23 08:25:33 2025 -0500 - - [flang][OpenMP] General utility to get directive id from AST node (#150121) - - Fortran::parser::omp::GetOmpDirectiveName(t) will get the - OmpDirectiveName object that corresponds to construct t. That object (an - AST node) contains the enum id and the source information of the - directive. - - Replace uses of extractOmpDirective and getOpenMPDirectiveEnum with the - new function. - -commit 36c37b019b5daae79785e8558d693e6ec42b0ebd -Author: Kareem Ergawy -Date: Wed Jul 23 11:23:00 2025 +0200 - - [flang][OpenMP] Restore reduction processor behavior broken by #145837 (#150178) - - Fixes #149089 and #149700. - - Before #145837, when processing a reduction symbol not yet supported by - OpenMP lowering, the reduction processor would simply skip filling in - the reduction symbols and variables. With #145837, this behvaior was - slightly changed because the reduction symbols are populated before - invoking the reduction processor (this is more convenient to shared the - code with `do concurrent`). - - This PR restores the previous behavior. - -commit 0586067cf07bef0f04fd1dc7135a9b773ebaa07a -Author: Michael Kruse -Date: Wed Jul 23 10:18:13 2025 +0200 - - [Flang] Build fix without precompiled headers - - The header semantics.h is added implitly in the precompiled headers, but - the build was failing when precompiled headers are disabled (e.g. - using CMAKE_DISABLE_PRECOMPILE_HEADERS=ON): - - ``` - ../_src/flang/lib/Semantics/canonicalize-omp.cpp: In constructor ‘Fortran::semantics::CanonicalizationOfOmp::CanonicalizationOfOmp(Fortran::semantics::SemanticsContext&)’: - ../_src/flang/lib/Semantics/canonicalize-omp.cpp:31:38: error: invalid use of incomplete type ‘class Fortran::semantics::SemanticsContext’ - 31 | : context_{context}, messages_{context.messages()} {} - | ^~~~~~~ - In file included from ../_src/flang/lib/Semantics/canonicalize-omp.cpp:9: - ../_src/flang/lib/Semantics/canonicalize-omp.h:17:7: note: forward declaration of ‘class Fortran::semantics::SemanticsContext’ - 17 | class SemanticsContext; - | ^~~~~~~~~~~~~~~~ - compilation terminated due to -fmax-errors=1. - ``` - -commit 2914a488c7f49c4817bbfb86f74da04fd338b4eb -Author: Krzysztof Parzyszek -Date: Tue Jul 22 07:37:47 2025 -0500 - - [flang][OpenMP] Sema checks, lowering with new format of MAP modifiers (#149137) - - OpenMP 6.0 has changed the modifiers on the MAP clause. Previous patch - has introduced parsing support for them. This patch introduces - processing of the new forms in semantic checks and in lowering. This - only applies to existing modifiers, which were updated in the 6.0 spec. - Any of the newly introduced modifiers (SELF and REF) are ignored. - -commit b487f9a7bd15e453a3ff7fcbfbc54e54eecf26d3 -Author: Michael Kruse -Date: Tue Jul 22 11:39:01 2025 +0200 - - [Flang] Implement !$omp unroll using omp.unroll_heuristic (#144785) - - Add support for `!$omp unroll` in Flang and basic MLIR - `omp.canonical_loop` modeling. - - First step to add `omp.canonical_loop` modeling to the MLIR OpenMP - dialect with the goal of being more general than the current - `omp.loop_nest` approach: - * Support for non-perfectly nested loops - * Support for non-rectangular loops - * Support for arbitrary compositions of loop transformations - - This patch is functional end-to-end and adds support for `!$omp unroll` - to Flang. `!$omp unroll` is lowered to `omp.new_cli`, - `omp.canonical_loop`, and `omp.unroll_heuristic` in MLIR, which are - lowered to LLVM-IR using the OpenMPIRBuilder - (https://reviews.llvm.org/D107764). - -commit a3a007ad5fa20abc90ead4e1030b481bf109b4cf -Author: Maksim Levental -Date: Mon Jul 21 18:54:29 2025 -0500 - - [mlir][NFC] update `flang/Lower` create APIs (8/n) (#149912) - - See https://github.com/llvm/llvm-project/pull/147168 for more info. - -commit 2aa1e54fa1ff7f7c347e7108fe8650e94014c941 -Author: Krzysztof Parzyszek -Date: Mon Jul 21 10:55:37 2025 -0500 - - [flang][OpenMP] Parse OpenMP 6.0 map modifiers (#149134) - - OpenMP 6.0 has changed the modifiers on the MAP clause: - - map-type-modifier has been split into individual modifiers, - - map-type "delete" has become a modifier, - - new modifiers have been added. - - This patch adds parsing support for all of the OpenMP 6.0 modifiers. The - old "map-type-modifier" is retained, but is no longer created in - parsing. It will remain to take advantage of the preexisting modifier - validation for older versions: when the OpenMP version is < 6.0, the - modifiers will be rewritten back as map-type-modifiers (or map- type in - case of "delete"). - - In this patch the modifiers will always be rewritten in the older format - to isolate these changes to parsing as much as possible. - -commit 9e5b2fbe86ed9b303eff779fff012d6a96574f3d -Author: Peter Klausler -Date: Fri Jul 18 13:45:05 2025 -0700 - - [flang][runtime] Preserve type when remapping monomorphic pointers (#149427) - - Pointer remappings unconditionally update the element byte size and - derived type of the pointer's descriptor. This is okay when the pointer - is polymorphic, but not when a pointer is associated with an extended - type. - - To communicate this monomorphic case to the runtime, add a new entry - point so as to not break forward binary compatibility. - -commit 151fffccf1340d8a2800664cbcaaa579ba772a4c -Author: Kazu Hirata -Date: Fri Jul 18 08:05:12 2025 -0700 - - [flang] Migrate away from ArrayRef(std::nullopt_t) (#149454) - - ArrayRef(std::nullopt_t) has been deprecated. This patch replaces - std::nullopt with mlir::TypeRange{} or mlir::ValueRange{} as - appropriate. - -commit 2a7328dacae39e87ca4cc7548b9abcdba60b946b -Author: Kazu Hirata -Date: Thu Jul 17 15:23:55 2025 -0700 - - [flang] Migrate away from ArrayRef(std::nullopt_t) (#149337) - - ArrayRef(std::nullopt_t) has been deprecated. This patch replaces - std::nullopt with {}. - - A subsequence patch will address those places where we need to replace - std::nullopt with mlir::TypeRange{} or mlir::ValueRange{}. - -commit ff5784bb9094f6035851dc7abc4a5760fdc21e45 -Author: Krzysztof Parzyszek -Date: Thu Jul 17 12:11:12 2025 -0500 - - [flang][OpenMP] Move extractOmpDirective to Utils.cpp, NFC (#148653) - -commit 7c8a197918a0c4044c1be39a26d517eea95a5ec9 -Author: Kareem Ergawy -Date: Fri Jul 11 07:42:51 2025 +0200 - - [NFC][flang] Move `ReductionProcessor` to `Lower/Support`. (#146025) - - With #145837, the `ReductionProcessor` component is now used by both - OpenMP and `do concurrent`. Therefore, this PR moves it to a shared - location: `flang/Lower/Support`. - - PR stack: - - https://github.com/llvm/llvm-project/pull/145837 - - https://github.com/llvm/llvm-project/pull/146025 (this one) - - https://github.com/llvm/llvm-project/pull/146028 - - https://github.com/llvm/llvm-project/pull/146033 - -commit eba35cc1c0e4e2c59f9fd1f7a6f3b17cb4d8c765 -Author: Kareem Ergawy -Date: Fri Jul 11 06:39:30 2025 +0200 - - [flang][do concurrent] Re-model `reduce` to match reductions are modelled in OpenMP and OpenACC (#145837) - - This PR proposes re-modelling `reduce` specifiers to match OpenMP and - OpenACC. In particular, this PR includes the following: - - * A new `fir` op: `fir.delcare_reduction` which is identical to OpenMP's - `omp.declare_reduction` op. - * Updating the `reduce` clause on `fir.do_concurrent.loop` to use the - new op. - * Re-uses the `ReductionProcessor` component to emit reductions for `do - concurrent` just like we do for OpenMP. To do this, the - `ReductionProcessor` had to be refactored to be more generalized. - * Upates mapping `do concurrent` to `fir.loop ... unordered` nests using - the new reduction model. - - Unfortunately, this is a big PR that would be difficult to divide up in - smaller parts because the bottom of the changes are the `fir` table-gen - changes to `do concurrent`. However, doing these MLIR changes cascades - to the other parts that have to be modified to not break things. - - This PR goes in the same direction we went for `private/local` - speicifiers. Now the `do concurrent` and OpenMP (and OpenACC) dialects - are modelled in essentially the same way which makes mapping between - them more trivial, hopefully. - - PR stack: - - https://github.com/llvm/llvm-project/pull/145837 (this one) - - https://github.com/llvm/llvm-project/pull/146025 - - https://github.com/llvm/llvm-project/pull/146028 - - https://github.com/llvm/llvm-project/pull/146033 - -commit c919221bbe56fae15b509fcc84c25b0c041eb6b5 -Author: Valentin Clement (バレンタイン クレメン) -Date: Thu Jul 10 20:52:55 2025 -0700 - - [flang][cuda][NFC] Remove TODO implemented in semantic (#148058) - -commit 9a0e03f430dec4634086fe8315c4c3b730bd7c66 -Author: Valentin Clement (バレンタイン クレメン) -Date: Thu Jul 10 09:50:31 2025 -0700 - - [flang][cuda] Update implicit data transfer for device component (#147882) - - Update the detection of implicit data transfer when a device resident - allocatable derived-type component is involved and remove the TODOs. - -commit c4138a24dc254783697f512f053e794fb1c68f88 -Author: Vijay Kandiah -Date: Wed Jul 9 15:47:11 2025 -0500 - - [mlir][acc][flang] Lower nested ACC loops with tile clause as collapsed loops (#147801) - - In the case of nested loops, `acc.loop` is meant to subsume all of the - loops that it applies to (when explicitly described as doing so in the - OpenACC specification). So when there is a `acc loop tile(...)` present - on nested Fortran DO loops, `acc.loop` should apply to the `n` loops - that `tile` applies to. This change lowers such nested Fortran loops - with tile clause into a collapsed `acc.loop` with `n` IVs, loop bounds, - and step, in a similar fashion to the current lowering for acc loops - with `collapse` clause. - -commit c9900015a9a0bc2ccadae5e24b63ddbfe4d508fd -Author: Shunsuke Watanabe -Date: Wed Jul 9 13:43:54 2025 +0900 - - [flang] Add -fcomplex-arithmetic= option and select complex division algorithm (#146641) - - This patch adds an option to select the method for computing complex - number division. It uses `LoweringOptions` to determine whether to lower - complex division to a runtime function call or to MLIR's `complex.div`, - and `CodeGenOptions` to select the computation algorithm for - `complex.div`. The available option values and their corresponding - algorithms are as follows: - - `full`: Lower to a runtime function call. (Default behavior) - - `improved`: Lower to `complex.div` and expand to Smith's algorithm. - - `basic`: Lower to `complex.div` and expand to the algebraic algorithm. - - See also the discussion in the following discourse post: - https://discourse.llvm.org/t/optimization-of-complex-number-division/83468 - - --------- - - Co-authored-by: Tarun Prabhu - -commit 46caad52ac14cefd6f9cf3188863818e330f3844 -Author: Valentin Clement (バレンタイン クレメン) -Date: Tue Jul 8 10:52:15 2025 -0700 - - [flang][cuda] Do not produce data transfer in offloaded do concurrent (#147435) - - If a `do concurrent` loop is offloaded then there should be no CUDA data - transfer in it. Update the semantic and lowering to take that into - account. - - `AssignmentChecker` has to be put into a separate pass because the - checkers in `SemanticsVisitor` cannot have the same `Enter/Leave` - functions. The `DoForallChecker` already has `Eneter/Leave` functions - for the `DoConstruct`. - -commit 9a8d45f6268112dce4950cd2f21628963546082f -Author: Jack Styles -Date: Tue Jul 8 18:28:58 2025 +0100 - - [Flang][OpenMP] Fix crash when block.end() is missed (#147519) - - As reported in #145917 and #147309, there are situation's where flang - may crash. This is because `nextIt` in - `RewriteOpenMPLoopConstruct` gets re-assigned when an iterator is erased - from the block. If this is missed, Flang may attempt to access a - location in memory that is not accessable and cause a compiler crash. - - This adds protection where the crash can occur, and a test with a - reproducer that can trigger the crash. - - Fixes #147309 - -commit 65cb0eae58d2b668869f3e8f10cb79eb2b8c55ac -Author: Jack Styles -Date: Tue Jul 1 08:39:15 2025 +0100 - - [Flang][OpenMP] Add Semantics support for Nested OpenMPLoopConstructs (#145917) - - In OpenMP Version 5.1, the tile and unroll directives were added. When - using these directives, it is possible to nest them within other OpenMP - Loop Constructs. This patch enables the semantics to allow for this - behaviour on these specific directives. Any nested loops will be stored - within the initial Loop Construct until reaching the DoConstruct itself. - - Relevant tests have been added, and previous behaviour has been retained - with no changes. - - See also, #110008 - -commit faefe7cf7daf585a781af151726d31981cee9e4f -Author: jeanPerier -Date: Mon Jun 30 09:58:00 2025 +0200 - - [flang] add option to generate runtime type info as external (#146071) - - Reland #145901 with a fix for shared library builds. - - So far flang generates runtime derived type info global definitions (as - opposed to declarations) for all the types used in the current - compilation unit even when the derived types are defined in other - compilation units. It is using linkonce_odr to achieve derived type - descriptor address "uniqueness" aspect needed to match two derived type - inside the runtime. - - This comes at a big compile time cost because of all the extra globals - and their definitions in apps with many and complex derived types. - - This patch adds and experimental option to only generate the rtti - definition for the types defined in the current compilation unit and to - only generate external declaration for the derived type descriptor - object of types defined elsewhere. - - Note that objects compiled with this option are not compatible with - object files compiled without because files compiled without it may drop - the rtti for type they defined if it is not used in the compilation unit - because of the linkonce_odr aspect. - - I am adding the option so that we can better measure the extra cost of - the current approach on apps and allow speeding up some compilation - where devirtualization does not matter (and the build config links to - all module file object anyway). - -commit 344b5b7f9e5bb5c48ee3e9e380706038eaa89044 -Author: Krzysztof Parzyszek -Date: Sat Jun 28 13:38:00 2025 -0500 - - [flang][OpenMP] Move lowering of ATOMIC to separate file, NFC (#146225) - - Reinstate commits e5559ca4 and 925dbc79. Fix the issues with compilation - hangs by including DenseMapInfo specialization where the corresponding - instance of DenseMap was defined. - - Ref: https://github.com/llvm/llvm-project/pull/144960 - -commit dc6d2b841f22f3257721d9affba8edc4560f5e7a -Author: Krzysztof Parzyszek -Date: Fri Jun 27 09:44:16 2025 -0500 - - Revert "[flang][OpenMP] Move lowering of ATOMIC to separate file, NFC" (#146091) - - Reverts llvm/llvm-project#146067 - - This still causes timeouts, e.g. - - https://lab.llvm.org/buildbot/#/builders/207/builds/3023/steps/7/logs/stdio - -commit 302ed97b583f0529959b198366ffe892644007ba -Author: Krzysztof Parzyszek -Date: Fri Jun 27 08:19:16 2025 -0500 - - [flang][OpenMP] Move lowering of ATOMIC to separate file, NFC (#146067) - - Reinstate commits e5559ca4 and 925dbc79 with changes that avoid the - reported failures in Windows builds. - - Ref: https://github.com/llvm/llvm-project/pull/144960 - -commit 37e2d104994d36f848664660434b7f9d6e03ac2c -Author: jeanPerier -Date: Fri Jun 27 14:05:59 2025 +0200 - - Revert "[flang] add option to generate runtime type info as external" (#146064) - - Reverts llvm/llvm-project#145901 - - Broke shared library builds because of the usage of - `skipExternalRttiDefinition` in Lowering. - -commit 91f10df794d3293e18a56770acc1fd66fa0b7690 -Author: Akash Banerjee -Date: Fri Jun 27 13:05:22 2025 +0100 - - [Flang][OpenMP] Skip implicit mapping of named constants (#145966) - - Added early return when mapping named constants. - - This prevents linking error in the following example: - - ``` - program test - use, intrinsic :: iso_c_binding, only: c_double - implicit none - - real(c_double) :: x - integer :: i - x = 0.0_c_double - !$omp target teams distribute parallel do reduction(+:x) - do i = 0, 9 - x = x + 1.0_c_double - end do - !$omp end target teams distribute parallel do - end program test - ``` - -commit e816817bbb2889a42d8d984736971635d77816f3 -Author: jeanPerier -Date: Fri Jun 27 13:00:29 2025 +0200 - - [flang] add option to generate runtime type info as external (#145901) - - So far flang generates runtime derived type info global definitions (as - opposed to declarations) for all the types used in the current - compilation unit even when the derived types are defined in other - compilation units. It is using linkonce_odr to achieve derived type - descriptor address "uniqueness" aspect needed to match two derived type - inside the runtime. - - This comes at a big compile time cost because of all the extra globals - and their definitions in apps with many and complex derived types. - - This patch adds and experimental option to only generate the rtti - definition for the types defined in the current compilation unit and to - only generate external declaration for the derived type descriptor - object of types defined elsewhere. - - Note that objects compiled with this option are not compatible with - object files compiled without because files compiled without it may drop - the rtti for type they defined if it is not used in the compilation unit - because of the linkonce_odr aspect. - - I am adding the option so that we can better measure the extra cost of - the current approach on apps and allow speeding up some compilation - where devirtualization does not matter (and the build config links to - all module file object anyway). - -commit 938cdb30f16f4fefc4c7177d7a47fb571a297c43 -Author: Kazu Hirata -Date: Thu Jun 26 12:41:49 2025 -0700 - - [flang] Migrate away from std::nullopt (NFC) (#145928) - - ArrayRef has a constructor that accepts std::nullopt. This - constructor dates back to the days when we still had llvm::Optional. - - Since the use of std::nullopt outside the context of std::optional is - kind of abuse and not intuitive to new comers, I would like to move - away from the constructor and eventually remove it. - - This patch replaces std::nullopt with {}. There are a couple of - places where std::nullopt is replaced with TypeRange() to accommodate - perfect forwarding. - -commit cfdc4c4a5b671646cb08aeab106103cc7006cb89 -Author: Muhammad Omair Javaid -Date: Thu Jun 26 18:32:20 2025 +0500 - - Revert "[flang][OpenMP] Move lowering of ATOMIC to separate file, NFC (#144960)" - - PR#144960 broke check-flang tests on Windows (x64/ARM64). - - This reverts commit e5559ca45f211f2cdd9c81e46935afe1cc2e22ab. - -commit d0469d1d3c31e919dba97637ac7ab063c44118e5 -Author: Jacques Pienaar -Date: Wed Jun 25 10:58:44 2025 +0200 - - [mlir] Move WalkResult to Support (#145649) - - This also enables moving StateStack, both are relatively generic helper - structs not tied to IR. - -commit 77af8bff97a0b20dac9ff9a95385d036da7d8ba5 -Author: Lance Wang -Date: Tue Jun 24 21:00:13 2025 -0700 - - [mlir]Moves the StateStack to IR folder from Support folder. (#145598) - - [MLIR] Fix circular dependency introduced in In - https://github.com/llvm/llvm-project/pull/144897. This PR is to break - the dependency. by moving StateStack to IR folder - - This commit resolves a circular dependency issue between mlir/Support - and mlir/IR: - - - Move StateStack.h and StateStack.cpp from Support to IR folder - - Update CMakeLists.txt files to reflect the new locations - - Update Bazel BUILD file to maintain correct dependencies - - Update includes in affected files (flang, Target/LLVMIR) - - The circular dependency was caused by StateStack.h depending on - IR/Visitors.h - while other IR files depended on Support. Moving StateStack to IR - eliminates - this cycle while maintaining proper separation of concerns. - -commit 8f7f48a97ea53161e046eeb52a8020f228d79a00 -Author: Tom Eccles -Date: Tue Jun 24 18:30:37 2025 +0100 - - [flang][OpenMP][NFC] remove globals with mlir::StateStack (#144898) - - Idea suggested by @skatrak - -commit e970f59e6b20dddc4369735affb79ca9be240c1c -Author: NimishMishra <42909663+NimishMishra@users.noreply.github.com> -Date: Fri Jun 20 17:23:00 2025 +0530 - - [flang][OpenMP] Reintroduce TODO for FIR lowering of linear clause (#144883) - - Current design of the linear clause lowering and translation shifts all - responsibility for handling the clause (like privatisation, linear - stepping, finalisation, and emission of synchronisation barriers) to the - IRBuilder. However in certain corner cases (like associated loops in or - before OpenMP version 4.5), variables are are implicitly linear. This - currently causes a problem with the existing linear clause - implementation. Hence, re-introduce TODO on the linear clause until the - linear clause lowering/translation are robust enough to handle such - cases as well. - - Fixes https://github.com/llvm/llvm-project/issues/142935 - -commit e5559ca45f211f2cdd9c81e46935afe1cc2e22ab -Author: Krzysztof Parzyszek -Date: Fri Jun 20 06:44:14 2025 -0500 - - [flang][OpenMP] Move lowering of ATOMIC to separate file, NFC (#144960) - -commit 9fd22cb56d4c626769afd938e0f9ef6157164394 -Author: Peter Klausler -Date: Thu Jun 19 13:42:46 2025 -0700 - - [flang][NFC] Move new code to right place (#144551) - - Some new code was added to flang/Semantics that only depends on - facilities in flang/Evaluate. Move it into Evaluate and clean up some - minor stylistic problems. - -commit 89efae916a5de0387710b7dc06938423817e1503 -Author: Jack Styles -Date: Thu Jun 19 15:32:27 2025 +0100 - - [Flang][OpenMP] Update default MapType for Map Clauses and OpenMP 5.2 (#144715) - - In OpenMP 5.2, the `target enter data` and `target exit data` constructs - now have default map types if the user does not define them in the Map - clause. For `target enter data`, this is `to` and `target exit data` - this is `from`. This behaviour is now enabled when OpenMP 5.2 or greater - is used when compiling. To enable this, the default value is now set in - the `processMap` clause, with any previous behaviour being maintained - for either older versions of OpenMP or other directives. - - See also #110008 - -commit 936c5566db013225dc098ff961395bb19e1bf2a4 -Author: Krzysztof Parzyszek -Date: Thu Jun 19 07:18:21 2025 -0500 - - [flang][OpenMP] Handle REQUIRES ADMO in lowering (#144362) - - The previous approach rewrote the atomic constructs in the AST based on - the REQUIRES ATOMIC_DEFAULT_MEM_ORDER directives. The new approach - checks for incorrect uses of REQUIRED ADMO in the semantic analysis, and - applies it in lowering, eliminating the need for a separate - tree-rewriting procedure. - -commit 97e17e15957bf6f03923ca46301b32cad507f34b -Author: Kareem Ergawy -Date: Tue Jun 17 11:34:05 2025 +0200 - - Revert "[flang] Enable delayed localization by default for `do concurrent` (#144074)" (#144476) - - This reverts commit b5dbf8210a57b986b9802304745f4c5c108cf37b. - - Reverting again due to gfortran failure: - https://lab.llvm.org/buildbot/#/builders/17/builds/8868 - -commit 2dc58e02cbce83784a38b4cc33f83529ad1a7c7e -Author: Kareem Ergawy -Date: Tue Jun 17 07:01:53 2025 +0200 - - [flang][OpenMP] Add symbol table scopes for `teams` and `parallel` (#144015) - - Adds symbol map scopes for standalone `teams` and `parallel` constructs. - This is required to properly bind the privatized symbols in both - constructs so that nested constructs can find them. - - Resolves https://github.com/llvm/llvm-project/issues/116428. - -commit b5dbf8210a57b986b9802304745f4c5c108cf37b -Author: Kareem Ergawy -Date: Tue Jun 17 06:08:38 2025 +0200 - - [flang] Enable delayed localization by default for `do concurrent` (#144074) - - Reintroduces changes from - https://github.com/llvm/llvm-project/issues/143897. A fix for the - reported problem in https://github.com/llvm/llvm-project/issues/143897 - is hopefully resolved in - https://github.com/llvm/llvm-project/pull/144027. - - This PR aims to make it easier and more self-contained to revert the - switch/flag if we discover any problems with enabling it by default. - -commit 7caeec599998bd8aa01d498574e148e4e9c982db -Author: Kareem Ergawy -Date: Tue Jun 17 06:08:15 2025 +0200 - - [NFC][flang][OpenMP] Unify `genSectionsOp`'s prototype to match other `genXXXOp` functions (#144013) - - Unifies the prototype of `genSectionsOp` to match other ops generators. - Doing so, we are able to call `genSectionsOp` directtly from - `genOMPDispatch` instead of the special handling needed now to pass the - section blocks. This is useful because now we can handle symbol mapping - scopes easier for nested OpenMP directives. See - - https://github.com/llvm/llvm-project/pull/143706#issuecomment-2965344723 - and the following discussion for more info. - -commit f12b1ed11672bc40a53fb1180541b2fda6e7d9fc -Author: Kajetan Puchalski -Date: Thu Jun 12 16:35:36 2025 +0100 - - [flang][OpenMP] Add TODOs for target [teams|parallel] private (#143706) - - Using the private clause on `target teams` or `target parallel` is not - currently implemented and causes crashes during lowering. Add - appropriate TODOs. - - Resolves https://github.com/llvm/llvm-project/issues/116428. - - Signed-off-by: Kajetan Puchalski - -commit 4bd0a0e50bcfc3263c219acc9709ae234a334456 -Author: Kareem Ergawy -Date: Thu Jun 12 17:09:55 2025 +0200 - - Revert "[flang] Enable delayed localization by default for `do concurrent` (#142567)" (#143905) - - This reverts commit 937be177528de156922c1b5f6cab08ba3009dbf2. - - Resolves https://github.com/llvm/llvm-project/issues/143897 until the - todo is properly handled. - -commit 91be47dccfa3480c152916838404d49107fde45c -Author: Kazu Hirata -Date: Wed Jun 11 08:53:54 2025 -0700 - - [flang] Fix warnings - - This patch fixes: - - flang/lib/Lower/OpenMP/OpenMP.cpp:3904:9: error: unused variable - 'action0' [-Werror,-Wunused-variable] - - flang/lib/Lower/OpenMP/OpenMP.cpp:3905:9: error: unused variable - 'action1' [-Werror,-Wunused-variable] - -commit 141d390dcb6cd174b07ca663e58f37ab24eee08a -Author: Krzysztof Parzyszek -Date: Wed Jun 11 10:05:34 2025 -0500 - - [flang][OpenMP] Overhaul implementation of ATOMIC construct (#137852) - - The parser will accept a wide variety of illegal attempts at forming an - ATOMIC construct, leaving it to the semantic analysis to diagnose any - issues. This consolidates the analysis into one place and allows us to - produce more informative diagnostics. - - The parser's outcome will be parser::OpenMPAtomicConstruct object - holding the directive, parser::Body, and an optional end-directive. The - prior variety of OmpAtomicXyz classes, as well as OmpAtomicClause have - been removed. READ, WRITE, etc. are now proper clauses. - - The semantic analysis consistently operates on "evaluation" - representations, mainly evaluate::Expr (as SomeExpr) and - evaluate::Assignment. The results of the semantic analysis are stored in - a mutable member of the OpenMPAtomicConstruct node. This follows a - precedent of having `typedExpr` member in parser::Expr, for example. - This allows the lowering code to avoid duplicated handling of AST nodes. - - Using a BLOCK construct containing multiple statements for an ATOMIC - construct that requires multiple statements is now allowed. In fact, any - nesting of such BLOCK constructs is allowed. - - This implementation will parse, and perform semantic checks for both - conditional-update and conditional-update-capture, although no MLIR will - be generated for those. Instead, a TODO error will be issues prior to - lowering. - - The allowed forms of the ATOMIC construct were based on the OpenMP 6.0 - spec. - -commit 937be177528de156922c1b5f6cab08ba3009dbf2 -Author: Kareem Ergawy -Date: Wed Jun 11 10:10:22 2025 +0200 - - [flang] Enable delayed localization by default for `do concurrent` (#142567) - - This PR aims to make it easier and more self-contained to revert the - switch/flag if we discover any problems with enabling it by default. - -commit b994a4c04f38d8cfb13f3dbf3d99146cb778443e -Author: Peter Klausler -Date: Tue Jun 10 14:44:41 2025 -0700 - - [flang][NFC] Clean up code in two new functions (#142037) - - Two recently-added functions in Semantics/tools.h need some cleaning up - to conform to the coding style of the project. One of them should - actually be in Parser/tools.{h,cpp}, the other doesn't need to be - defined in the header. - -commit bac4aa440c12b2f90a1e12ab8aa6e3f842beb387 -Author: Kareem Ergawy -Date: Thu Jun 5 01:01:53 2025 +0200 - - [flang] Extend localization support for `do concurrent` (`init` regions) (#142564) - - Extends support for locality specifiers in `do concurrent` by supporting - data types that need `init` regions. - - This further unifies the paths taken by the compiler for OpenMP - privatization clauses and `do concurrent` locality specifiers. - -commit aac1f85393e74b643d08c948c3c2da156a231073 -Author: Leandro Lupori -Date: Tue Jun 3 10:58:23 2025 -0300 - - [flang][OpenMP] Explicitly set Shared DSA in symbols (#142154) - - Before this change, OmpShared was not always set in shared symbols. - Instead, absence of private flags was interpreted as shared DSA. - The problem was that symbols with no flags, with only a host - association, could also mean "has same DSA as in the enclosing - context". Now shared symbols behave the same as private and can be - treated the same way. - - Because of the host association symbols with no flags mentioned - above, it was also incorrect to simply test the flags of a given - symbol to find out if it was private or shared. The function - GetSymbolDSA() was added to fix this. It would be better to avoid - the need of these special symbols, but this would require changes - to how symbols are collected in lowering. - - Besides that, some semantic checks need to know if a DSA clause - was used or not. To avoid confusing implicit symbols with DSA - clauses a new flag was added: OmpExplicit. It is now set for all - symbols with explicitly determined data-sharing attributes. - - With the changes above, AddToContextObjectWithDSA() and the symbol - to DSA map could probably be removed and the DSA could be obtained - directly from the symbol, but this was not attempted. - - Some debug messages were also added, with the "omp" DEBUG_TYPE, to - make it easier to debug the creation of implicit symbols and to - visualize all associations of a given symbol. - - Fixes #130533 - Fixes #140882 - -commit 99ae675fb7957f3eb8b65e9086dae4bbc722f221 -Author: Akash Banerjee -Date: Fri May 30 14:39:03 2025 +0100 - - [NFC][OpenMP] Move the default declare mapper name suffix to OMPConstants.h (#141964) - - This patch moves the default declare mapper name suffix - ".omp.default.mapper" to the OMPConstants.h file to be used everywhere - for lowering. - -commit f8dcb059ae06376b0991936026d5befb3d7b109b -Author: Kareem Ergawy -Date: Thu May 29 13:13:44 2025 +0200 - - [flang][fir][OpenMP] Refactor privtization code into shared location (#141767) - - Refactors the utils needed to create privtization/locatization ops for - both the fir and OpenMP dialects into a shared location isolating OpenMP - stuff out of it as much as possible. - -commit 7e9887a50df2de9c666f5e7ceb46c27bfccc618f -Author: Kareem Ergawy -Date: Thu May 29 12:27:03 2025 +0200 - - [flang] Generlize names of delayed privatization CLI flags (#138816) - - Remove the `openmp` prefix from delayed privatization/localization flags - since they are now used for `do concurrent` as well. - - PR stack: - - https://github.com/llvm/llvm-project/pull/137928 - - https://github.com/llvm/llvm-project/pull/138505 - - https://github.com/llvm/llvm-project/pull/138506 - - https://github.com/llvm/llvm-project/pull/138512 - - https://github.com/llvm/llvm-project/pull/138534 - - https://github.com/llvm/llvm-project/pull/138816 (this PR) - -commit e33cd9690fe11305acd7df35532d712844b9049e -Author: Kareem Ergawy -Date: Thu May 29 11:04:27 2025 +0200 - - [flang][fir] Basic PFT to MLIR lowering for do concurrent locality specifiers (#138534) - - Extends support for `fir.do_concurrent` locality specifiers to the PFT - to MLIR level. This adds code-gen for generating the newly added - `fir.local` ops and referencing these ops from `fir.do_concurrent.loop` - ops that have locality specifiers attached to them. This reuses the - `DataSharingProcessor` component and generalizes it a bit more to allow - for handling `omp.private` ops and `fir.local` ops as well. - - - PR stack: - - https://github.com/llvm/llvm-project/pull/137928 - - https://github.com/llvm/llvm-project/pull/138505 - - https://github.com/llvm/llvm-project/pull/138506 - - https://github.com/llvm/llvm-project/pull/138512 - - https://github.com/llvm/llvm-project/pull/138534 (this PR) - - https://github.com/llvm/llvm-project/pull/138816 - -commit 59b7b5b6b5c032ed21049d631eb5d67091f3a21c -Author: Akash Banerjee -Date: Wed May 28 14:32:17 2025 +0100 - - [OpenMP][Flang] Fix semantic check and scoping for declare mappers (#140560) - - The current semantic check in place is incorrect, this patch fixes this. - - Up to 1 **'default'** named mapper should be allowed for each derived - type. - The current semantic check only allows up to 1 **'default'** named - mapper across all derived types. - - This also makes sure that declare mappers follow proper scoping rules - for both default and named mappers. - - Co-authored-by: Raghu Maddhipatla - -commit 5530474e3e84edd02c85043c60e4df967fee7f26 -Author: Yang Zaizhou <91008302+Mxfg-incense@users.noreply.github.com> -Date: Fri May 23 20:15:10 2025 +0800 - - [Flang][OpenMP] fix crash on sematic error in atomic capture clause (#140710) - - Fix a crash caused by an invalid expression in the atomic capture - clause, due to the `checkForSymbolMatch` function not accounting for - `GetExpr` potentially returning null. - - Fix https://github.com/llvm/llvm-project/issues/139884 - -commit 0baacd1a58420f7e4da14faa1f0e9a21d5294a6a -Author: NimishMishra <42909663+NimishMishra@users.noreply.github.com> -Date: Tue May 20 12:03:06 2025 +0530 - - [flang][OpenMP] Support MLIR lowering of linear clause for omp.wsloop (#139385) - - This patch adds support for MLIR lowering of linear clause on omp.wsloop - (except for linear modifiers). - -commit bbb7f0148177d332df80b5cfdc7d161dca289056 -Author: Asher Mancinelli -Date: Tue May 13 08:13:47 2025 -0700 - - [flang] Fix volatile attribute propagation on allocatables (#139183) - - Ensure volatility is reflected not just on the reference to an - allocatable, but on the box, too. When we declare a volatile - allocatable, we now get a volatile reference to a volatile box. - - Some related cleanups: - * SELECT TYPE constructs check the selector's type for volatility when - creating and designating the type used in the selecting block. - * Refine the verifier for fir.convert. In general, I think it is ok to - implicitly drop volatility in any ptr-to-int conversion because it means - we are in codegen (and representing volatility on the LLVM ops and - intrinsics) or we are calling an external function (are there any cases - I'm not thinking of?) - * An allocatable test that was XFAILed is now passing. Making - allocatables' boxes volatile resulted in accesses of those boxes being - volatile, which resolved some errors coming from the strict verifier. - * I noticed a runtime function was missing the fir.runtime attribute. - -commit 8a9e767fa690e0232db2cfa8576f53b48ae53f30 -Author: Pranav Bhandarkar -Date: Mon May 12 22:34:58 2025 -0500 - - [Flang][MLIR] - Handle the mapping of subroutine arguments when they are subsequently used inside the region of an `omp.target` Op (#134967) - - This is a fix for https://github.com/llvm/llvm-project/issues/134912 - which is a problem with mapping `fir.boxchar` type values to the - target i.e an `omp.target` op. - - There really are two problems. Fixing the first exposed the second. The - first problem is that OpenMP lowering of maps in `omp.target` in Flang - cannot handle the mapping of a value that doesnt have a defining - operation. In other words, a value that is a block argument. This is handled - by mapping the value using a `MapInfoOp`. - The second problem this fixes is that it adds bounds to `omp.map.info` - ops that map `fir.char` types by extracting the length from the - corresponding `fir.boxchar` - -commit 09b772e2efad804fdda02e2bd9ee44a2aaaddeeb -Author: Slava Zakharin -Date: Mon May 12 14:03:15 2025 -0700 - - [flang] Postpone hlfir.end_associate generation for calls. (#138786) - - If we generate hlfir.end_associate at the end of the statement, - we get easier optimizable HLFIR, because there are no compiler - generated operations with side-effects in between the call - and the consumers. This allows more hlfir.eval_in_mem to reuse - the LHS instead of allocating temporary buffer. - - I do not think the same can be done for hlfir.copy_out always, e.g.: - ``` - subroutine test2(x) - interface - function array_func2(x,y) - real:: x(*), array_func2(10), y - end function array_func2 - end interface - real :: x(:) - x = array_func2(x, 1.0) - end subroutine test2 - ``` - - If we postpone the copy-out until after the assignment, then - the result may be wrong. - -commit eef4b5a0cdf102e5035d6d4f1aa5f85b2b787e84 -Author: Zhen Wang <37195552+wangzpgi@users.noreply.github.com> -Date: Mon May 12 10:06:39 2025 -0700 - - [flang] [cuda] Fix CUDA implicit data transfer entity creation (#139414) - - Fixed an issue in `genCUDAImplicitDataTransfer` where creating an - `hlfir::Entity` from a symbol address could fail when the address comes - from a `hlfir.declare` operation. Fix is to check if the address comes - from a `hlfir.declare` operation. If so, use the base value from the - declare op when available. Falling back to the original address - otherwise. - -commit 939bb4e028499a3eda783567cda7d5331ba0c242 -Author: agozillon -Date: Mon May 12 10:49:26 2025 -0500 - - [NFC] Add const to newly added helper functions from PR #135226 - -commit f687ed9ff717372a7c751a3bf4ef7e33eb481fd6 -Author: agozillon -Date: Mon May 12 16:30:43 2025 +0200 - - [Flang][OpenMP] Initial defaultmap implementation (#135226) - - This aims to implement most of the initial arguments for defaultmap - aside from firstprivate and none, and some of the more recent OpenMP 6 - additions which will come in subsequent updates (with the OpenMP 6 - variants needing parsing/semantic support first). - -commit 4d9479fa8f4e949bc4c5768477cd36687c1c6b29 -Author: Andre Kuhlenschmidt -Date: Fri May 9 11:12:24 2025 -0700 - - [flang][openacc] Allow open acc routines from other modules. (#136012) - - OpenACC routines annotations in separate compilation units currently get - ignored, which leads to errors in compilation. There are two reason for - currently ignoring open acc routine information and this PR is - addressing both. - - The module file reader doesn't read back in openacc directives from - module files. - - Simple fix in `flang/lib/Semantics/mod-file.cpp` - - The lowering to HLFIR doesn't generate routine directives for symbols - imported from other modules that are openacc routines. - - This is the majority of this diff, and is address by the changes that - start in `flang/lib/Lower/CallInterface.cpp`. - -commit 7aed77ef954f83cc52dad3eba4f51470e21b1cb0 -Author: NimishMishra <42909663+NimishMishra@users.noreply.github.com> -Date: Fri May 9 21:55:21 2025 +0530 - - [flang][OpenMP] Add implicit casts for omp.atomic.capture (#138163) - - This patch adds support for emitting implicit casts for atomic capture - if its constituent operations have different yet compatible types. - - Fixes: https://github.com/llvm/llvm-project/issues/138123 and - https://github.com/llvm/llvm-project/issues/94177 - -commit a68f35a17db03a6633a660d310156f4e2f17197f -Author: Krzysztof Parzyszek -Date: Fri May 9 07:42:15 2025 -0500 - - [flang][OpenMP] Pass OpenMP version to getOpenMPDirectiveName (#139131) - - The OpenMP version is stored in LangOptions in SemanticsContext. Use the - fallback version where SemanticsContext is unavailable (mostly in case - of debug dumps). - - RFC: - https://discourse.llvm.org/t/rfc-alternative-spellings-of-openmp-directives/85507 - - Reland with a fix for build break in f18-parse-demo. - -commit 89822ff5a8608570897c21a3c40fb450c53f603f -Author: Krzysztof Parzyszek -Date: Fri May 9 07:55:13 2025 -0500 - - Revert "[flang][OpenMP] Pass OpenMP version to getOpenMPDirectiveName (#139131)" - - This reverts commit 41aa67488c3ca33334ec79fb5216145c3644277c. - - Breaks build: https://lab.llvm.org/buildbot/#/builders/140/builds/22826 - -commit 41aa67488c3ca33334ec79fb5216145c3644277c -Author: Krzysztof Parzyszek -Date: Fri May 9 07:42:15 2025 -0500 - - [flang][OpenMP] Pass OpenMP version to getOpenMPDirectiveName (#139131) - - The OpenMP version is stored in LangOptions in SemanticsContext. Use the - fallback version where SemanticsContext is unavailable (mostly in case - of debug dumps). - - RFC: - https://discourse.llvm.org/t/rfc-alternative-spellings-of-openmp-directives/85507 - -commit b291cfcad4815568dc1eaca58185d25dceed3f1c -Author: agozillon -Date: Fri May 9 13:57:45 2025 +0200 - - [Flang][OpenMP] Generate correct present checks for implicit maps of optional allocatables (#138210) - - Currently, we do not generate the appropriate checks to check if an - optional - allocatable argument is present before accessing relevant components of - it, - in particular when creating bounds, we must generate a presence check - and we - must make sure we do not generate/keep an load external to the presence - check - by utilising the raw address rather than the regular address of the info - data structure. - - Similarly in cases for optional allocatables we must treat them like - non-allocatable - arguments and generate an intermediate allocation that we can have as a - location - in memory that we can access later in the lowering without causing - segfaults when - we perform "mapping" on it, even if the end result is an empty - allocatable - (basically, we shouldn't explode if someone tries to map a non-present - optional, - similar to C++ when mapping null data). - -commit dd42112c82d7b12669513dca4048167664b211b2 -Author: Kaviya Rajendiran <67495422+kaviya2510@users.noreply.github.com> -Date: Fri May 9 17:26:28 2025 +0530 - - [Flang][OpenMP] MLIR lowering support for grainsize and num_tasks clause (#128490) - - - Added MLIR lowering for grainsize and num_tasks clauses of taskloop construct. - -commit 227e1ff73b6c0cbdd912c69405777f7121dc0760 -Author: Kareem Ergawy -Date: Thu May 8 21:42:52 2025 +0200 - - [flang][fir] Add locality specifiers modeling to `fir.do_concurrent.loop` (#138506) - -commit 2a32d738bb213a8a1e814b65beb61e39b7c66834 -Author: Tom Eccles -Date: Thu May 8 10:08:49 2025 +0100 - - [flang][OpenMP] fix predetermined privatization inside section (#138159) - - This now produces code equivalent to if there was an explicit private - clause on the SECTIONS construct. - - The problem was that each SECTION construct got its own DSP, which tried - to privatize the same symbol for that SECTION. Privatization for - SECTION(S) happens on the outer SECTION construct and so the outer - construct's DSP should be shared. - - Fixes #135108 - -commit 2fb288d4b8e0fb6c08a1a72b64cbf6a0752fdac7 -Author: Kareem Ergawy -Date: Wed May 7 12:52:25 2025 +0200 - - [flang][fir] Lower `do concurrent` loop nests to `fir.do_concurrent` (#137928) - - Adds support for lowering `do concurrent` nests from PFT to the new - `fir.do_concurrent` MLIR op as well as its special terminator - `fir.do_concurrent.loop` which models the actual loop nest. - - To that end, this PR emits the allocations for the iteration variables - within the block of the `fir.do_concurrent` op and creates a region for - the `fir.do_concurrent.loop` op that accepts arguments equal in number - to the number of the input `do concurrent` iteration ranges. - - For example, given the following input: - ```fortran - do concurrent(i=1:10, j=11:20) - end do - ``` - the changes in this PR emit the following MLIR: - ```mlir - fir.do_concurrent { - %22 = fir.alloca i32 {bindc_name = "i"} - %23:2 = hlfir.declare %22 {uniq_name = "_QFsub1Ei"} : (!fir.ref) -> (!fir.ref, !fir.ref) - %24 = fir.alloca i32 {bindc_name = "j"} - %25:2 = hlfir.declare %24 {uniq_name = "_QFsub1Ej"} : (!fir.ref) -> (!fir.ref, !fir.ref) - fir.do_concurrent.loop (%arg1, %arg2) = (%18, %20) to (%19, %21) step (%c1, %c1_0) { - %26 = fir.convert %arg1 : (index) -> i32 - fir.store %26 to %23#0 : !fir.ref - %27 = fir.convert %arg2 : (index) -> i32 - fir.store %27 to %25#0 : !fir.ref - } - } - ``` - -commit 75e5643abf6b59db8dfae6b524e9c3c2ec0ffc29 -Author: Tom Eccles -Date: Wed May 7 10:18:13 2025 +0100 - - [flang][OpenMP] share global variable initialization code (#138672) - - Fixes #108136 - - In #108136 (the new testcase), flang was missing the length parameter - required for the variable length string when boxing the global variable. - The code that is initializing global variables for OpenMP did not - support types with length parameters. - - Instead of duplicating this initialization logic in OpenMP, I decided to - use the exact same initialization as is used in the base language - because this will already be well tested and will be updated for any new - types. The difference for OpenMP is that the global variables will be - zero initialized instead of left undefined. - - Previously `Fortran::lower::createGlobalInitialization` was used to - share a smaller amount of the logic with the base language lowering. I - think this bug has demonstrated that helper was too low level to be - helpful, and it was only used in OpenMP so I have made it static inside - of ConvertVariable.cpp. - -commit a13c0b67708173b8033a53ff6ae4c46c5b80bb2b -Author: Kiran Chandramohan -Date: Wed May 7 09:56:45 2025 +0100 - - [Flang][OpenMP] Add frontend support for declare variant (#130578) - - Support is added for parsing. Basic semantics support is added to - forward the code to Lowering. Lowering will emit a TODO error. Detailed - semantics checks and lowering is further work. - -commit e1fed24034fee3f45bc17252ced5ee29ab6b5408 -Author: NimishMishra <42909663+NimishMishra@users.noreply.github.com> -Date: Wed May 7 11:14:48 2025 +0530 - - [flang][OpenMP] Fix fir.convert in omp.atomic.update region (#138397) - - Region generation in omp.atomic.update currently emits a direct - `fir.convert`. This crashes when the RHS expression involves complex - type but the LHS variable is primitive type (say `f32`), since a - `fir.convert` from `complex` to `f32` is emitted, which is illegal. - This PR adds a conditional check to emit an additional `ExtractValueOp` - in case RHS expression has a complex type. - - Fixes https://github.com/llvm/llvm-project/issues/138396 - -commit 9e7d529607ebde67af5b214a654de82cfa2ec8c4 -Author: Kaviya Rajendiran <67495422+kaviya2510@users.noreply.github.com> -Date: Wed May 7 10:25:56 2025 +0530 - - [Flang][OpenMP]Support for lowering task_reduction and in_reduction to MLIR (#111155) - - This patch, - - Added support for lowering of task_reduction to MLIR - - Added support for lowering of in_reduction to MLIR - - Fixed incorrect DSA handling for variables in the presence of 'in_reduction' clause. - -commit e356893551b315c84f30f7828eb493c4ef02e118 -Author: Kaviya Rajendiran <67495422+kaviya2510@users.noreply.github.com> -Date: Tue May 6 19:47:22 2025 +0530 - - [Flang][OpenMP] Support for lowering of taskloop construct to MLIR (#138646) - - Added support for lowering of taskloop construct and its clauses(Private - and Firstprivate) to MLIR. - -commit c61746650178c117996e1787617f36ccda7233f7 -Author: NimishMishra <42909663+NimishMishra@users.noreply.github.com> -Date: Thu May 1 20:50:42 2025 +0530 - - [flang][llvm][OpenMP] Add implicit casts to omp.atomic (#131603) - - Currently, implicit casts in Fortran are handled by the OMPIRBuilder. - This patch shifts that responsibility to FIR codegen. - -commit 8836bce84208737f5807c396345a41e688d3ec11 -Author: Asher Mancinelli -Date: Wed Apr 30 08:46:33 2025 -0700 - - [flang] Add lowering of volatile references (#132486) - - [RFC on - discourse](https://discourse.llvm.org/t/rfc-volatile-representation-in-flang/85404/1) - - Flang currently lacks support for volatile variables. For some cases, - the compiler produces TODO error messages and others are ignored. Some - of our tests are like the example from _C.4 Clause 8 notes: The VOLATILE - attribute (8.5.20)_ and require volatile variables. - - Prior commits: - ``` - c9ec1bc753b0 [flang] Handle volatility in lowering and codegen (#135311) - e42f8609858f [flang][nfc] Support volatility in Fir ops (#134858) - b2711e1526f9 [flang][nfc] Support volatile on ref, box, and class types (#134386) - ``` - -commit 7dd8122d4ea147a2e8b90d611e30d4c2cff4619f -Author: Pranav Bhandarkar -Date: Tue Apr 29 14:53:15 2025 -0500 - - [Flang][MLIR][OpenMP] - Add support for firstprivate when translating omp.target ops from MLIR to LLVMIR (#131213) - - This patch adds support to translate `firstprivate` clauses on `omp.target` ops when translating from MLIR to LLVMIR. - Presently, this PR is restricted to supporting only included tasks, i.e `#omp target nowait firstprivate(some_variable)` will likely not work correctly even if it produces object code. - -commit 9ea5254f77ae5d5fe8e81f8e39b5d461cc95e9dc -Author: Krzysztof Parzyszek -Date: Mon Apr 28 15:43:39 2025 -0500 - - [flang][OpenACC][OpenMP] Separate implementations of ATOMIC constructs (#137517) - - The OpenMP implementation of the ATOMIC construct will change in the - near future to accommodate atomic conditional-update and conditional- - update-capture operations. This patch separates the shared implemen- - tations to avoid interfering with OpenACC. - -commit 46e734746db7176f6e32b3c98beacf1e94fced37 -Author: Valentin Clement (バレンタイン クレメン) -Date: Mon Apr 21 19:22:07 2025 -0700 - - [flang][cuda] Update stream type for cuf kernel op (#136627) - - Update the type of the stream operand to be similar to KernelLaunchOp. - -commit 50db7a7d269b42f0cda63eb005aadfdbe25f56cb -Author: Slava Zakharin -Date: Fri Apr 18 17:19:12 2025 -0700 - - [flang] Fixed fir.dummy_scope generation to work for TBAA. (#136382) - - The nesting of fir.dummy_scope operations defines the roots - of the TBAA forest. If we do not generate fir.dummy_scope - in functions that do not have any dummy arguments, then - the globals accessed in the function and the dummy arguments - accessed by the callee may end up in different sub-trees - of the same root. The added tbaa-with-dummy-scope2.fir - demonstrates the issue. - -commit 30990c09c99bdcbfa7084d32b2b9851e19b6fb2a -Author: Kareem Ergawy -Date: Wed Apr 16 14:20:27 2025 +0200 - - Revert "[flang][fir] Lower `do concurrent` loop nests to `fir.do_concurrent` (#132904)" (#135904) - - This reverts commit 04b87e15e40f8857e29ade8321b8b67691545a50. - - The reasons for reverting is that the following: - 1. I still need need to upstream some part of the do concurrent to - OpenMP pass from our downstream implementation and taking this in - downstream will make things more difficult. - 2. I still need to work on a solution for modeling locality specifiers - on `hlfir.do_concurrent` ops. I would prefer to do that and merge the - entire stack together instead of having a partial solution. - - After merging the revert I will reopen the origianl PR and keep it - updated against main until I finish the above. - -commit 04b87e15e40f8857e29ade8321b8b67691545a50 -Author: Kareem Ergawy -Date: Wed Apr 16 06:14:38 2025 +0200 - - [flang][fir] Lower `do concurrent` loop nests to `fir.do_concurrent` (#132904) - - Adds support for lowering `do concurrent` nests from PFT to the new - `fir.do_concurrent` MLIR op as well as its special terminator - `fir.do_concurrent.loop` which models the actual loop nest. - - To that end, this PR emits the allocations for the iteration variables - within the block of the `fir.do_concurrent` op and creates a region for - the `fir.do_concurrent.loop` op that accepts arguments equal in number - to the number of the input `do concurrent` iteration ranges. - - For example, given the following input: - ```fortran - do concurrent(i=1:10, j=11:20) - end do - ``` - the changes in this PR emit the following MLIR: - ```mlir - fir.do_concurrent { - %22 = fir.alloca i32 {bindc_name = "i"} - %23:2 = hlfir.declare %22 {uniq_name = "_QFsub1Ei"} : (!fir.ref) -> (!fir.ref, !fir.ref) - %24 = fir.alloca i32 {bindc_name = "j"} - %25:2 = hlfir.declare %24 {uniq_name = "_QFsub1Ej"} : (!fir.ref) -> (!fir.ref, !fir.ref) - fir.do_concurrent.loop (%arg1, %arg2) = (%18, %20) to (%19, %21) step (%c1, %c1_0) { - %26 = fir.convert %arg1 : (index) -> i32 - fir.store %26 to %23#0 : !fir.ref - %27 = fir.convert %arg2 : (index) -> i32 - fir.store %27 to %25#0 : !fir.ref - } - } - ``` - -commit 4c09ae0b2ed6a99e4e69ec9e0507c26cdcc301a9 -Author: Tom Eccles -Date: Tue Apr 8 10:29:18 2025 +0100 - - [flang][OpenMP] Lowering for CANCEL and CANCELLATIONPOINT (#134248) - - These will still hit TODOs in OpenMPToLLVMIRConversion.cpp - -commit 446d4f51eb1a172776e69ffb51b5972a0225c0a1 -Author: Tom Eccles -Date: Tue Apr 8 10:27:27 2025 +0100 - - [flang][OpenMP][Lower] fix statement context cleanup insertion point (#133891) - - The statement context is used for lowering clauses for openmp operations - using generalised helpers from flang lowering. The statement context - stores closures which generate code for cleaning up temporary values - generated by the lowering helper. These closures are run when the - statement construct is destroyed. Keeping the statement context local to - the clause or operation being lowered without any special handling was - not correct because any cleanup code would be generated at the insertion - point when that statement context went out of scope (which would in - general be inside of the newly created container operation). It would be - better to generate the cleanup code after the newly created operation - (clause processing is synchronous even for deferred tasks). - - Currently supported clauses are mostly populated with simple scalar - values that require no cleanup. Even the simple array sections added by - #132994 needed no cleanup because indexing the right values of the array - did not create any temporaries. Supporting array sections with vector - indexing will generate hlfir.destroy operations for cleanup. This patch - fixes where those will be created. Those hlfir.destroy operations don't - generate any FIR (or LLVM) code, but the issue still exists - theoretically. - - I wasn't able to find any clauses which have any cleanup to use to test - this PR. It is probably NFC for the current lowering. This will be - tested in [the PR adding vector subscripting of array - sections](https://github.com/llvm/llvm-project/pull/133892). - -commit 8f0d8d28ccd8a1ced82a744679c5152f90e80c77 -Author: Zhen Wang <37195552+wangzpgi@users.noreply.github.com> -Date: Sun Apr 6 19:31:09 2025 -0700 - - Delete duplicated hlfir.declare op of induction variables of do concurrent when inside cuf kernel directive. (#134467) - - Delete duplicated creation of hlfir.declare op of do concurrent - induction variables when inside cuf kernel directive. - Obtain the correct hlfir.declare op generated from bindSymbol, and add - it to ivValues. - -commit 18dd299fb109792d0716156af0a2d8c0ca781c57 -Author: Sergio Afonso -Date: Thu Apr 3 15:06:19 2025 +0100 - - [Flang][MLIR][OpenMP] Host-evaluation of omp.loop bounds (#133908) - - This patch updates Flang lowering and kernel flags identification in - MLIR so that loop bounds on `target teams loop` constructs are evaluated - on the host, making the trip count available to the corresponding - `__tgt_target_kernel` call emitted for the target region. - - This is necessary in order to properly execute these constructs as - `target teams distribute parallel do`. - - Co-authored-by: Kareem Ergawy - -commit c309abd92553191c404f2dc13f637bcfd53033f9 -Author: Jean-Didier PAILLEUX -Date: Wed Apr 2 14:30:01 2025 +0200 - - [flang] Implement !DIR$ NOVECTOR and !DIR$ NOUNROLL[_AND_JAM] (#133885) - - Hi, - This patch implements support for the following directives : - - `!DIR$ NOUNROLL_AND_JAM` to disable unrolling and jamming on a DO - LOOP. - - `!DIR$ NOUNROLL` to disable unrolling on a DO LOOP. - - `!DIR$ NOVECTOR` to disable vectorization on a DO LOOP. - -commit e17d864f55133d46e12614280951ddb2dc43cc74 -Author: Tom Eccles -Date: Tue Apr 1 10:26:14 2025 +0100 - - [flang][OpenMP][Lower] lower array subscripts for task depend (#132994) - - The OpenMP standard says that all dependencies in the same set of - inter-dependent tasks must be non-overlapping. This simplification means - that the OpenMP only needs to keep track of the base addresses of - dependency variables. This can be seen in kmp_taskdeps.cpp, which stores - task dependency information in a hash table, using the base address as a - key. - - This patch generates a rebox operation to slice boxed arrays, but only - the box data address is used for the task dependency. The extra box is - optimized away by LLVM at O3. - - Vector subscripts are TODO (I will address in my next patch). - - This also fixes a bug for ordinary subscripts when the symbol was mapped - to a box: - - Fixes #132647 - -commit 091dcb8fc2b6ccb88c2975076e94f3cb6530db46 -Author: Thirumalai Shaktivel <74826228+Thirumalai-Shaktivel@users.noreply.github.com> -Date: Tue Apr 1 11:35:44 2025 +0530 - - [Flang] Make a private copy for the common block variables in copyin clause (#111359) - - Fixes: https://github.com/llvm/llvm-project/issues/82949 - -commit fe30cf18ab3eb1aba5ea7e44574e27fdde791c1d -Author: swatheesh-mcw -Date: Fri Mar 28 20:51:52 2025 +0530 - - Revert "Revert "[flang][openmp] Adds Parser and Semantic Support for Interop Construct, and Init and Use Clauses."" (#132343) - - Reverts llvm/llvm-project#132005 - -commit 123eb75cd43a5181c324efc033e978f0f1ed4598 -Author: Michael Kruse -Date: Fri Mar 21 12:32:54 2025 +0100 - - [Flang] Do not emit numeric_storage_size into object file (#131463) - - The value of numeric_storage_size depends on compilation options and - therefore its value is not yet known when building the builtins runtime. - Instead, the parameter is folding a __numeric_storage_size() expression - which is loaded into the user program. For the iso_fortran_env object - file, omit the symbol as it is never used. - - Similar tests that ensure that __numeric_storage_size() is not folded - until compiling the actual user program exist in FortranEvalutate: - - https://github.com/llvm/llvm-project/blob/1e6ba3cd2fe96be00b6ed6ba28b3d9f9271d784d/flang/lib/Evaluate/check-expression.cpp#L487-L492 - - https://github.com/llvm/llvm-project/blob/1e6ba3cd2fe96be00b6ed6ba28b3d9f9271d784d/flang/lib/Evaluate/fold-integer.cpp#L1457-L1460 - - Required for using CMake to compile the builtin module files. See RFC at - https://discourse.llvm.org/t/rfc-building-flangs-builtin-mod-files/84626 - -commit 68180d8d16f07db8200dfce7bae26a80c43ebc5e -Author: Krzysztof Parzyszek -Date: Thu Mar 20 06:50:43 2025 -0500 - - [flang][OpenMP] Use OmpDirectiveSpecification in standalone directives (#131163) - - This uses OmpDirectiveSpecification in the rest of the standalone - directives. - -commit ac9e4e9b3320b8dc63abfbdca8b7561e372ec8c7 -Author: Sergio Afonso -Date: Wed Mar 19 17:29:40 2025 +0000 - - [Flang][OpenMP] Simplify entry block creation for BlockArgOpenMPOpInterface ops, NFC (#132036) - - This patch adds the `OpWithBodyGenInfo::blockArgs` field and updates - `createBodyOfOp()` to prevent the need for `BlockArgOpenMPOpInterface` - operations to pass the same callback, minimizing chances of introducing - inconsistent behavior. - -commit cd26dd55959c11c1cd0ea4fe1f07e0fa9cb7a72a -Author: Krzysztof Parzyszek -Date: Wed Mar 19 11:34:40 2025 -0500 - - [flang][OpenMP] Use OmpDirectiveSpecification in simple directives (#131162) - - The `OmpDirectiveSpecification` contains directive name, the list of - arguments, and the list of clauses. It was introduced to store the - directive specification in METADIRECTIVE, and could be reused everywhere - a directive representation is needed. - In the long term this would unify the handling of common directive - properties, as well as creating actual constructs from METADIRECTIVE by - linking the contained directive specification with any associated user - code. - -commit 96b112fb613e216a198ae2e956a367742c838eed -Author: Kiran Chandramohan -Date: Wed Mar 19 11:13:52 2025 +0000 - - Revert "[flang][openmp] Adds Parser and Semantic Support for Interop Construct, and Init and Use Clauses." (#132005) - - Reverts llvm/llvm-project#120584 - - Reverting due to CI failure - https://lab.llvm.org/buildbot/#/builders/157/builds/22946 - -commit ee8a759bfb4772dea7459f4ecbd83bc2be5ee68b -Author: swatheesh-mcw -Date: Wed Mar 19 16:19:17 2025 +0530 - - [flang][openmp] Adds Parser and Semantic Support for Interop Construct, and Init and Use Clauses. (#120584) - - Adds Parser and Semantic Support for the below construct and clauses: - - Interop Construct - - Init Clause - - Use Clause - - Note: - The other clauses supported by Interop Construct such as Destroy, Use, - Depend and Device are added already. - -commit e7c6e3557b372afe6e78c025addfec276a10b49d -Author: Tom Eccles -Date: Wed Mar 19 10:12:52 2025 +0000 - - [flang][OpenMP] Fix threadprivate pointer variable in common block (#131888) - - Fixes #112538 - - The problem was that the host associated symbol for the threadprivate - variable doesn't have all of the symbol attributes (e.g. POINTER). This - caused the lowering code to generate the wrong type, eventually hitting - an assertion. - -commit cbc5c11feca0a65a7731de8d6eb14fddf2f233aa -Author: Akash Banerjee -Date: Tue Mar 18 13:17:10 2025 +0000 - - [MLIR][OpenMP] Add Lowering support for implicitly linking to default declare mappers (#131006) - -commit 83658ddb1b58fa10cf5f8ac8dfbe794b7a3701bc -Author: Kareem Ergawy -Date: Tue Mar 18 14:07:41 2025 +0100 - - [flang][OpenMP] Enable delayed privatization by default for `omp.distribute` (#131574) - - Switches delayed privatization for `omp.distribute` to be on by default: - controlled by the `-openmp-enable-delayed-privatization` instead of by - `-openmp-enable-delayed-privatization-staging`. - - ### GFortran & Fujitsu test suite results: - - #### gfotran test-suite (this PR): - ``` - Testing Time: 34.51s - Passed: 6569 - ``` - - #### Fujitsu without changes (commit: 0813c5cf5f52): - ``` - Testing Time: 155.39s - Passed : 88325 - Failed : 156 - Executable Missing: 408 - ``` - - #### Fujitsu with changes (this PR): - ``` - Testing Time: 158.54s - Passed : 88325 - Failed : 156 - Executable Missing: 408 - ``` - -commit 3ff3b29dd62436e34d7e6551398c3a09fb590c07 -Author: jeanPerier -Date: Fri Mar 14 10:51:46 2025 +0100 - - [flang] lower remaining cases of pointer assignments inside forall (#130772) - - Implement handling of `NULL()` RHS, polymorphic pointers, as well as - lower bounds or bounds remapping in pointer assignment inside FORALL. - - These cases eventually do not require updating hlfir.region_assign, - lowering can simply prepare the new descriptor for the LHS inside the - RHS region. - - Looking more closely at the polymorphic cases, there is not need to call - the runtime, fir.rebox and fir.embox do handle the dynamic type setting - correctly. - - After this patch, the last remaining TODO is the allocatable assignment - inside FORALL, which like some cases here, is more likely an accidental - feature given FORALL was deprecated in F2003 at the same time than - allocatable components where added. - -commit f4fc2d731c1b351d5f684f7ec53a0e1ca549df43 -Author: Krzysztof Parzyszek -Date: Wed Mar 12 19:41:11 2025 -0500 - - [flang][OpenMP] Map ByRef if size/alignment exceed that of a pointer (#130832) - - Improve the check for whether a type can be passed by copy. Currently, - passing by copy is done via the OMP_MAP_LITERAL mapping, which can only - transfer as much data as can be contained in a pointer representation. - -commit 29f5d5bea92f937d4e2fea7fdd16036fff528adf -Author: Leandro Lupori -Date: Tue Mar 11 09:38:40 2025 -0300 - - [flang][OpenMP] Fix privatization of procedure pointers (#130336) - - Fixes #121720 - -commit d67947162f4b06172fac91fefa6a9ad25eb6dd56 -Author: Krzysztof Parzyszek -Date: Mon Mar 10 08:11:01 2025 -0500 - - [flang][OpenMP] Implement HAS_DEVICE_ADDR clause (#128568) - - The HAS_DEVICE_ADDR indicates that the object(s) listed exists at an - address that is a valid device address. Specifically, - `has_device_addr(x)` means that (in C/C++ terms) `&x` is a device - address. - - When entering a target region, `x` does not need to be allocated on the - device, or have its contents copied over (in the absence of additional - mapping clauses). Passing its address verbatim to the region for use is - sufficient, and is the intended goal of the clause. - - Some Fortran objects use descriptors in their in-memory representation. - If `x` had a descriptor, both the descriptor and the contents of `x` - would be located in the device memory. However, the descriptors are - managed by the compiler, and can be regenerated at various points as - needed. The address of the effective descriptor may change, hence it's - not safe to pass the address of the descriptor to the target region. - Instead, the descriptor itself is always copied, but for objects like - `x`, no further mapping takes place (as this keeps the storage pointer - in the descriptor unchanged). - - --------- - - Co-authored-by: Sergio Afonso - -commit 40e245a9aac02e0bbb6b44287bc13c80a68d37b3 -Author: jeanPerier -Date: Fri Mar 7 10:28:02 2025 +0100 - - [flang] add support for procedure pointer assignment inside FORALL (#130114) - - Very similar to object pointer assignment, the difference is the SSA - types of the LHS (!fir.ref()>> and RHS - (!fir.boxproc<()->()). - - The RHS must be saved as simple address, not descriptors (it is not - possible to make CFI descriptor out of procedure entity). - -commit 9543e9e9270e01f2c7311b571246c6ea105bcdb0 -Author: Kareem Ergawy -Date: Fri Mar 7 05:44:39 2025 +0100 - - [flang][OpenMP] Handle pre-detemined `lastprivate` for `simd` (#129507) - - This PR tries to fix `lastprivate` update issues in composite - constructs. In particular, pre-determined `lastprivate` symbols are - attached to the wrong leaf of the composite construct (the outermost - one). When using delayed privatization (should be the default mode in - the future), this results in trying to update the `lastprivate` symbol - in the wrong construct (outside the `omp.loop_nest` op). - - For example, given the following input: - ```fortran - !$omp target teams distribute parallel do simd collapse(2) private(y_max) - do i=x_min,x_max - do j=y_min,y_max - enddo - enddo - ``` - - Without the fixes introduced in this PR, the `DataSharingProcessor` - tries to generate the `lastprivate` update ops in the `parallel` op - since this is the op for which the DSP instance is created. - - The fix consists of 2 main parts: - 1. Instead of creating a single DSP instance, one instance is created - for the leaf constructs that might need privatization (whether for - explicit, implicit, or pre-determined symbols). - 2. When generating the `lastprivate` comparison ops, we don't directly - use the SSA values of the UBs and steps. Instead, we regenerated these - SSA values from the original loop bounds' expressions. We have to do - this to avoid using `host_eval` values in the `lastprivate` comparison - logic which is illegal. - -commit 478e5161406a781afc41e15bf942fb5df6672067 -Author: Valentin Clement (バレンタイン クレメン) -Date: Thu Mar 6 19:19:51 2025 -0800 - - [flang][cuda] Sync double descriptor after c_f_pointer call (#130194) - - After a global device pointer is set through `c_f_pointer`, we need to - sync the double descriptor so the version on the device is also up to - date. - -commit e2911aa2c254c071c2ab898a69c69ba8c94586f8 -Author: Kiran Chandramohan -Date: Thu Mar 6 12:19:34 2025 +0000 - - [Flang][OpenMP] Fix crash when loop index var is pointer or allocatable (#129717) - - Use hlfir dereferencing for pointers and allocatables and use hlfir - assign. Also, change the code updating IV in lastprivate. - - Note: This is a small change. Modifications in existing tests are - changes from fir.store to hlfir.assign. - - Fixes #121290 - -commit d1abbb4dc5071e379d048f98b096260ed899ae44 -Author: Zhen Wang <37195552+wangzpgi@users.noreply.github.com> -Date: Wed Mar 5 14:50:42 2025 -0800 - - [flang][cuda] Change induction variable from i32 to index for doconcurrent inside cuf kernel directive (#129924) - - Use `index` instead of `i32` for induction variables for doconcurrent - inside cuf kernel directive. Regular do loop inside cuf kernel directive - also uses `index`: - ``` - cuf.kernel<<<*, *>>> (%arg0 : index) = ... - ``` - -commit 7302e1b94edb2de459a72b3e452d4f3be2d795eb -Author: jeanPerier -Date: Wed Mar 5 11:24:04 2025 +0100 - - [flang] implement simple pointer assignments inside FORALL (#129522) - - The semantic of pointer assignments inside FORALL requires evaluating - the targets (RHS) and pointer variables (LHS) of all iterations before - evaluating the assignments. - - In practice, if the compiler can prove that the RHS and LHS evaluations - are not impacted by the assignments, the evaluation of the FORALL - assignment statement can be done in a single loop. However, if the - compiler cannot prove this, it needs to "save" the addresses of the - targets and/or the pointer descriptors of each iterations before doing - the assignments. - - This patch implements the most common cases where there is no lower bound - spec, no bounds remapping, the LHS is not polymorphic, and the RHS is - not NULL. - - The HLFIR operation used to represent assignments inside FORALL can be - used for pointer assignments to (the only difference being that the LHS - is a descriptor address). - - The analysis for intrinsic assignment can be reused, with the - distinction that the RHS data is not read during the assignment. - - The logic that is used to save LHS in intrinsic assignments inside - FORALL is extracted to be used for the RHS of pointer assignments when - needed (saving a descriptor value). - Pointer assignment LHS are just descriptor addresses and are saved as - int_ptr values. - -commit 9573c621147748e5ca07f545db0d995708c29435 -Author: Krzysztof Parzyszek -Date: Mon Mar 3 07:59:19 2025 -0600 - - [flang][OpenMP] Accept modern syntax of FLUSH construct (#128975) - - The syntax with the object list following the memory-order clause has - been removed in OpenMP 5.2. Still, accept that syntax with versions >= - 5.2, but treat it as deprecated (and emit a warning). - -commit d1fd3698a9b755250f622fd1b14c57a27e2a9d77 -Author: Valentin Clement (バレンタイン クレメン) -Date: Sun Mar 2 16:12:01 2025 -0800 - - [flang][cuda] Allow unsupported data transfer to be done on the host (#129160) - - Some data transfer marked as unsupported can actually be deferred to an - assignment on the host when the variables involved are unified or - managed. - -commit 24b7759a9dfe5714236957e7d829e2412100a4b7 -Author: Mats Petersson -Date: Tue Feb 25 17:36:25 2025 +0000 - - [FLANG][OpenMP]Add frontend support for ASSUME and ASSUMES (#120770) - - Enough suport to parse correctly formed directives of !$OMP ASSUME and - !$OMP ASSUMES with teh related clauses that go with them: ABSENT, - CONTAINS, NO_OPENPP, NO_OPENMP_ROUTINES, NO_PARALLELISM and HOLDS. - - Tests added for unparsing and dump parse-tree. - - Semantics support is very minimal and no specific tests added. - - The lowering will hit a TODO, and there are tests in Lower/OpenMP/Todo - to make it clear that this is currently expected behaviour. - - --------- - - Co-authored-by: Kiran Chandramohan - Co-authored-by: Krzysztof Parzyszek - -commit 25c19eb1178a26b09e8ee58c825d4ed0260b70da -Author: Sergio Afonso -Date: Tue Feb 25 10:35:21 2025 +0000 - - [Flang][OpenMP] Allow host evaluation of loop bounds for distribute (#127822) - - This patch adds `target teams distribute [simd]` and equivalent - construct nests to the list of cases where loop bounds can be evaluated - in the host, as they represent kernels for which the trip count must - also be evaluated in advance to the kernel call. - -commit a67566b185c56ce84f6b858e431e4d412b40fdaa -Author: Zhen Wang <37195552+wangzpgi@users.noreply.github.com> -Date: Thu Feb 20 14:05:44 2025 -0800 - - Allow do concurrent inside cuf kernel directive (#127693) - - Allow do concurrent inside cuf kernel directive to avoid the following - Lowering error: - ``` - void {anonymous}::FirConverter::genFIR(const Fortran::parser::CUFKernelDoConstruct&): Assertion `bounds && "Expected bounds on the loop construct"' failed. - ``` - - --------- - - Co-authored-by: Valentin Clement (バレンタイン クレメン) - -commit d6c6bde9dbcf332b5092ebcee8c7fe6fbb5aa2ae -Author: Jean-Didier PAILLEUX -Date: Wed Feb 19 16:00:09 2025 +0100 - - [flang] Implement !DIR$ UNROLL_AND_JAM [N] (#125046) - - This patch implements support for the UNROLL_AND_JAM directive to enable - or disable unrolling and jamming on a `DO LOOP`. - It must be placed immediately before a `DO LOOP` and applies only to the - loop that follows. N is an integer that specifying the unrolling factor. - This is done by adding an attribute to the branch into the loop in LLVM - to indicate that the loop should unrolled and jammed. - -commit 9905728e2fb4ebe9b7518dfd73a0574eea0a2083 -Author: Akash Banerjee -Date: Tue Feb 18 16:36:01 2025 +0000 - - [MLIR][OpenMP] Add Lowering support for OpenMP Declare Mapper directive (#117046) - - This patch adds HLFIR/FIR lowering support for OpenMP Declare Mapper - directive. - Depends on #117045. - -commit 6b52fb25b90e575b507343bde0162d3d652ff666 -Author: Asher Mancinelli -Date: Mon Feb 10 08:21:22 2025 -0800 - - [flang] Correctly handle `!dir$ unroll` with unrolling factors of 0 and 1 (#126170) - - https://github.com/llvm/llvm-project/pull/123331 added support for the - unrolling directive. In the presence of an explicit unrolling factor, - that unrolling factor would be unconditionally passed into the metadata - even when it was 1 or 0. These special cases should instead disable - unrolling. Adding an explicit unrolling factor of 0 triggered this - assertion which is fixed by this patch: - - ``` - unsigned int unrollCountPragmaValue(const llvm::Loop*): - Assertion `Count >= 1 && "Unroll count must be positive."' failed. - ``` - - Updated tests and documentation. - -commit dcb124e820b2bf9dda60f66151591155a385580e -Author: Kareem Ergawy -Date: Thu Feb 6 19:11:04 2025 +0100 - - [flang][OpenMP] Enable delayed privatization by default `omp.wsloop` (#125732) - - Reapplies #122471 - - This is based on https://github.com/llvm/llvm-project/pull/125699, only - the latest commit is relevant. - - With changes in this PR and the parent one, the previously reported - failures in the Fujitsu(*) test suite should hopefully be resolved (I - verified all the 14 reported failures and they pass now). - - (*) https://linaro.atlassian.net/browse/LLVM-1521 - -commit b815a3942a0b0a9e7aab6b269ffdb0e93abc4368 -Author: Michael Kruse -Date: Thu Feb 6 15:29:10 2025 +0100 - - [Flang] Move non-common headers to FortranSupport (#124416) - - Move non-common files from FortranCommon to FortranSupport (analogous to - LLVMSupport) such that - - * declarations and definitions that are only used by the Flang compiler, - but not by the runtime, are moved to FortranSupport - - * declarations and definitions that are used by both ("common"), the - compiler and the runtime, remain in FortranCommon - - * generic STL-like/ADT/utility classes and algorithms remain in - FortranCommon - - This allows a for cleaner separation between compiler and runtime - components, which are compiled differently. For instance, runtime - sources must not use STL's `` which causes problems with CUDA - support. Instead, the surrogate header `flang/Common/optional.h` must be - used. This PR fixes this for `fast-int-sel.h`. - - Declarations in include/Runtime are also used by both, but are - header-only. `ISO_Fortran_binding_wrapper.h`, a header used by compiler - and runtime, is also moved into FortranCommon. - -commit ccd92ec4c6ceb09e75ed40c96c1da7d03b9c45d5 -Author: Anchu Rajendran S -Date: Wed Feb 5 06:55:32 2025 -0800 - - [flang][openmp] Changes for invoking scan Op (#123254) - -commit 6fc66d322b00bdabc27fe8e14b27ab9bd53ba770 -Author: Leandro Lupori -Date: Tue Feb 4 10:28:14 2025 -0300 - - [flang][OpenMP] Fix sections lastprivate for common blocks (#125504) - - Common block handling was missing in sections' lastprivate lowering. - - Fixes #121719 - -commit 6dfe20dbbd65e2945350ed9a93eb383131c49511 -Author: Krzysztof Parzyszek -Date: Mon Feb 3 11:13:44 2025 -0600 - - [flang][OpenMP] Parse METADIRECTIVE in specification part (#123397) - - Add METADIRECTIVE to the OpenMP declarative constructs as well. Emit a - TODO error for both declarative and executable cases. - -commit 15ab7be2e049bc0f4ea6744ca037395686a923bc -Author: Krzysztof Parzyszek -Date: Wed Jan 29 15:07:20 2025 -0600 - - [flang][OpenMP] Parse WHEN, OTHERWISE, MATCH clauses plus METADIRECTIVE (#121817) - - Parse METADIRECTIVE as a standalone executable directive at the moment. - This will allow testing the parser code. - - There is no lowering, not even clause conversion yet. There is also no - verification of the allowed values for trait sets, trait properties. - -commit e811cb00e533e9737db689e35ee6cb0d5af536cc -Author: Jean-Didier PAILLEUX -Date: Wed Jan 29 09:44:09 2025 +0100 - - [flang] Implement !DIR$ UNROLL [N] (#123331) - - This patch implements support for the UNROLL directive to control how - many times a loop should be unrolled. - It must be placed immediately before a `DO LOOP` and applies only to the - loop that follows. N is an integer that specifying the unrolling factor. - This is done by adding an attribute to the branch into the loop in LLVM - to indicate that the loop should unrolled. - The code pushed to support the directive `VECTOR ALWAYS` has been - modified to take account of the fact that several directives can be used - before a `DO LOOP`. - -commit 654b76321a602db4d68734e9fd11efbb7d8eb617 -Author: Valentin Clement (バレンタイン クレメン) -Date: Tue Jan 28 20:57:33 2025 -0800 - - [flang][cuda] Allow to set the stack limit size (#124859) - - This patch adds a call to the CUFInit function just after `ProgramStart` - when CUDA Fortran is enabled to initialize the CUDA context. This allows - us to set up some context information like the stack limit that can be - defined by an environment variable `ACC_OFFLOAD_STACKSIZE=`. - -commit 8035d38daab028b8da3cf2b01090b5f0ceacd695 -Author: Mats Petersson -Date: Sun Jan 26 09:44:04 2025 +0000 - - [Flang][OpenMP]Add parsing support for DISPATCH construct (#121982) - - This allows the Flang parser to accept the !$OMP DISPATCH and related - clauses. - - Lowering is currently not implemented. Tests for unparse and parse-tree - dump is provided, and one for checking that the lowering ends in a "not - yet implemented" - - --------- - - Co-authored-by: Kiran Chandramohan - -commit daa18205c6f0a3b5dd62ba2e65948e1a9182a60f -Author: Kaviya Rajendiran <67495422+kaviya2510@users.noreply.github.com> -Date: Thu Jan 23 11:14:00 2025 +0530 - - [Flang][OpenMP] Fix copyin allocatable lowering to MLIR (#122097) - - Fixes https://github.com/llvm/llvm-project/issues/113191 - - Issue: [flang][OpenMP] Runtime segfault when an allocatable variable is - used with copyin - - Rootcause: The value of the threadprivate variable is not being copied - from the primary thread to the other threads within a parallel region. - As a result it tries to access a null pointer inside a parallel region - which causes segfault. - - Fix: When allocatables used with copyin clause need to ensure that, on - entry to any parallel region each thread’s copy of a variable will - acquire the allocation status of the primary thread, before copying the - value of a threadprivate variable of the primary thread to the - threadprivate variable of each other member of the team. - -commit 937cbce14c9aa956342a9c818c26a8a557802843 -Author: Kareem Ergawy -Date: Wed Jan 22 10:16:40 2025 +0100 - - Revert "[flang][OpenMP] Enable delayed privatization by default `omp.wsloop` (#122471)" (#123324) - - This seems to have caused some regressions in Fujitsu's test-suite: - https://linaro.atlassian.net/browse/LLVM-1521 - - This reverts commit 6f82408bb53f57a859953d8f1114f1634a5d3ee9. - -commit 662133a278f4f3553f061f7999759bae4e842820 -Author: jeanPerier -Date: Tue Jan 21 20:32:42 2025 +0100 - - [flang][OpenMP][OpenACC] remove libEvaluate dependency in passes (#123784) - - Move OpenACC/OpenMP helpers from Lower/DirectivesCommon.h that are also - used in OpenACC and OpenMP mlir passes into a new - Optimizer/Builder/DirectivesCommon.h so that parser and evaluate headers - are not included in Optimizer libraries (this both introduce - compile-time and link-time pointless overheads). - - This should fix https://github.com/llvm/llvm-project/issues/123377 - -commit c2aa11d148679b7d49cdff3819d5c8bdbd807777 -Author: Thirumalai Shaktivel <74826228+Thirumalai-Shaktivel@users.noreply.github.com> -Date: Tue Jan 21 09:10:25 2025 +0530 - - [Flang] Add LLVM lowering support for UNTIED clause in Task (#121052) - - Implementation details: - The UNTIED clause is recognized by setting the flag=0 for the default - case or performing logical OR to flag if other clauses are specified, - and this flag is passed as an argument to the `__kmpc_omp_task_alloc` - runtime call. - - - Resubmitting the PR with fix for the failure, as it was reverted here: - 927a70daf31b1610627f346b0dc140eda72144b9 - and previously merged here: https://github.com/llvm/llvm-project/pull/115283 - -commit a0406ce823e8f1c1993b565d08b045c0104c3a5a -Author: Kareem Ergawy -Date: Thu Jan 16 19:10:12 2025 +0100 - - [flang][OpenMP] Add `hostIsSource` paramemter to `copyHostAssociateVar` (#123162) - - This fixes a bug when the same variable is used in `firstprivate` and - `lastprivate` clauses on the same construct. The issue boils down to the - fact that `copyHostAssociateVar` was deciding the direction of the copy - assignment (i.e. the `lhs` and `rhs`) based on whether the - `copyAssignIP` - parameter is set. This is not the best way to do it since it is not - related to whether we doing a copy from host to localized copy or the - other way around. When we set the insertion for `firstprivate` in - delayed privatization, this resulted in switching the direction of the - copy assignment. Instead, this PR adds a new paramter to explicitely - tell - the function the direction of the assignment. - - This is a follow up PR for - https://github.com/llvm/llvm-project/pull/122471, only the latest commit - is relevant. - -commit 6f82408bb53f57a859953d8f1114f1634a5d3ee9 -Author: Kareem Ergawy -Date: Thu Jan 16 15:44:59 2025 +0100 - - [flang][OpenMP] Enable delayed privatization by default `omp.wsloop` (#122471) - - This enable delayed privatization by default for `omp.wsloop` ops, with - one caveat! I had to workaround the "impure" alloc region issue that - being resolved at the moment. The workaround detects whether the alloc - region's argument is used in the region and at the same time defined in - block that does not dominate the chosen alloca insertion point. If so, - we move the alloca insertion point below the defining instruction of the - alloc region argument. This basically reverts to the - non-delayed-privatizaiton behavior. - -commit 0d150817c354bc61a48676754288aabbb03570c3 -Author: Kazu Hirata -Date: Tue Jan 14 11:08:53 2025 -0800 - - [flang] Fix a warning - - This patch fixes: - - flang/lib/Lower/OpenMP/OpenMP.cpp:599:15: error: unused variable - 'ompEval' [-Werror,-Wunused-variable] - -commit 8fe11a26ae8f12622ddec83a7b80637080843a8b -Author: Sergio Afonso -Date: Tue Jan 14 13:55:17 2025 +0000 - - [Flang][OpenMP] Lowering of host-evaluated clauses (#116219) - - This patch adds support for lowering OpenMP clauses and expressions - attached to constructs nested inside of a target region that need to be - evaluated in the host device. This is done through the use of the - `OpenMP_HostEvalClause` `omp.target` set of operands and entry block - arguments. - - When lowering clauses for a target construct, a more involved - `processHostEvalClauses()` function is called, which looks at the - current and potentially other nested constructs in order to find and - lower clauses that need to be processed outside of the `omp.target` - operation under construction. This populates an instance of a global - structure with the resulting MLIR values. - - The resulting list of host-evaluated values is used to initialize the - `host_eval` operands when constructing the `omp.target` operation, and - then replaced with the corresponding block arguments after creating that - operation's region. - - Afterwards, while lowering nested operations, those that might - potentially be evaluated on the host (i.e. `num_teams`, `thread_limit`, - `num_threads` and `collapse`) check first whether there is an active - global host-evaluated information structure and whether it holds values - referring to these clauses. If that is the case, the stored values - (`omp.target` entry block arguments at that stage) are used instead of - lowering these clauses again. - -commit 82b9eb1086d45caf74ff3d5dfa519631c247eb14 -Author: Sergio Afonso -Date: Mon Jan 13 12:31:29 2025 +0000 - - [Flang][OpenMP] Support teams reductions lowering (#122683) - - This patch adds PFT to MLIR lowering of teams reductions. Since there is - still no MLIR to LLVM IR translation implemented, compilation of - programs including these constructs will still trigger - not-yet-implemented errors. - -commit 42da12063f49e8d52e63dcb36d25b55ed3688a26 -Author: Kareem Ergawy -Date: Sun Jan 12 07:46:58 2025 +0100 - - [flang][OpenMP] Extend delayed privatization for `omp.simd` (#122156) - - Adds support for delayed privatization for `simd` directives. This PR - includes PFT down to LLVM IR lowering. - -commit d82d53b2e3d7fb2f44f91dc1ca9ce8bb5487da57 -Author: jeanPerier -Date: Tue Jan 7 10:04:27 2025 +0100 - - [flang][openmp] initialize allocatable components of firstprivate copies (#121808) - - Descriptors of allocatable components of firstprivate derived type - copies need to be set-up. Otherwise the program later die when - manipulating them inside OpenMP region. - -commit 9165848c8285884938583f5c3a35c97ec03ee486 -Author: Valentin Clement (バレンタイン クレメン) -Date: Fri Jan 3 14:37:14 2025 -0800 - - [flang][cuda] Sync global descriptor when nullifying pointer (#121595) - -commit 5137c209f0c19668d06e48cc4293e4c01a77c964 -Author: agozillon -Date: Fri Jan 3 16:46:15 2025 +0100 - - [Flang][OpenMP] Fix allocating arrays with size intrinisic (#119226) - - Attempt to address the following example from causing an assert or ICE: - - ``` - subroutine test(a) - implicit none - integer :: i - real(kind=real64), dimension(:) :: a - real(kind=real64), dimension(size(a, 1)) :: b - - !$omp target map(tofrom: b) - do i = 1, 10 - b(i) = i - end do - !$omp end target - end subroutine - ``` - - Where we utilise a Fortran intrinsic (size) to calculate the size of - allocatable arrays and then map it to device. - -commit adeff9f63a24f60b0bf240bf13e40bbf7c1dd0e8 -Author: Krzysztof Parzyszek -Date: Fri Jan 3 09:21:36 2025 -0600 - - [flang][OpenMP] Allow utility constructs in specification part (#121509) - - Allow utility constructs (error and nothing) to appear in the - specification part as well as the execution part. The exception is - "ERROR AT(EXECUTION)" which should only be in the execution part. - In case of ambiguity (the boundary between the specification and the - execution part), utility constructs will be parsed as belonging to the - specification part. In such cases move them to the execution part in the - OpenMP canonicalization code. - -commit df859f90aab261918eee26382021e8455b532f7d -Author: Krzysztof Parzyszek -Date: Fri Jan 3 08:36:34 2025 -0600 - - [flang][OpenMP] Frontend support for NOTHING directive (#120606) - - Create OpenMPUtilityConstruct and put the two utility directives in it - (error and nothing). Rename OpenMPErrorConstruct to OmpErrorDirective. - -commit c870632ef6162fbdccaad8cd09420728220ad344 -Author: Matthias Springer -Date: Wed Dec 25 09:42:03 2024 +0100 - - [flang] Fix some memory leaks (#121050) - - This commit fixes some but not all memory leaks in Flang. There are - still 91 tests that fail with ASAN. - - - Use `mlir::OwningOpRef` instead of `std::unique_ptr`. The latter does - not free allocations of nested blocks. - - Pass `ModuleOp` as value instead of reference. - - Add few missing deallocations in test cases and other places. - -commit 927a70daf31b1610627f346b0dc140eda72144b9 -Author: Muhammad Omair Javaid -Date: Tue Dec 24 01:47:24 2024 +0500 - - Revert "[Flang OpenMP] Add LLVM translation support for UNTIED in Task (#115283)" - - This reverts commit 919aead1db64b2f1444842bc75a3af7836238671. - It breaks following LLVM bots: - https://lab.llvm.org/buildbot/#/builders/199 - https://lab.llvm.org/buildbot/#/builders/143 - https://lab.llvm.org/buildbot/#/builders/17 - -commit 919aead1db64b2f1444842bc75a3af7836238671 -Author: Thirumalai Shaktivel <74826228+Thirumalai-Shaktivel@users.noreply.github.com> -Date: Fri Dec 20 16:36:51 2024 +0530 - - [Flang OpenMP] Add LLVM translation support for UNTIED in Task (#115283) - - Implementation details: - The UNTIED clause is recognized by setting the flag=0 for the default - case or performing logical OR to flag if other clauses are specified, - and this flag is passed as an argument to the `__kmpc_omp_task_alloc` - runtime call. - -commit 1fcb6a9754a8db057e18f629cb90011b638901e7 -Author: Leandro Lupori -Date: Thu Dec 19 17:26:50 2024 -0300 - - [flang][OpenMP] Initialize allocatable members of derived types (#120295) - - Allocatable members of privatized derived types must be allocated, - with the same bounds as the original object, whenever that member - is also allocated in it, but Flang was not performing such - initialization. - - The `Initialize` runtime function can't perform this task unless - its signature is changed to receive an additional parameter, the - original object, that is needed to find out which allocatable - members, with their bounds, must also be allocated in the clone. - As `Initialize` is used not only for privatization, sometimes this - other object won't even exist, so this new parameter would need - to be optional. - Because of this, it seemed better to add a new runtime function: - `InitializeClone`. - To avoid unnecessary calls, lowering inserts a call to it only for - privatized items that are derived types with allocatable members. - - Fixes https://github.com/llvm/llvm-project/issues/114888 - Fixes https://github.com/llvm/llvm-project/issues/114889 - -commit fc97d2e68b03bc2979395e84b645e5b3ba35aecd -Author: Peter Klausler -Date: Wed Dec 18 07:02:37 2024 -0800 - - [flang] Add UNSIGNED (#113504) - - Implement the UNSIGNED extension type and operations under control of a - language feature flag (-funsigned). - - This is nearly identical to the UNSIGNED feature that has been available - in Sun Fortran for years, and now implemented in GNU Fortran for - gfortran 15, and proposed for ISO standardization in J3/24-116.txt. - - See the new documentation for details; but in short, this is C's - unsigned type, with guaranteed modular arithmetic for +, -, and *, and - the related transformational intrinsic functions SUM & al. - -commit e532241b021cd48bad303721757c1194bc844775 -Author: Kareem Ergawy -Date: Wed Dec 18 09:19:45 2024 +0100 - - Re-apply (#117867): [flang][OpenMP] Implicitly map allocatable record fields (#120374) - - This re-applies #117867 with a small fix that hopefully prevents build - bot failures. The fix is avoiding `dyn_cast` for the result of - `getOperation()`. Instead we can assign the result to `mlir::ModuleOp` - directly since the type of the operation is known statically (`OpT` in - `OperationPass`). - -commit dc936f3c199374056d3aaf3a0434b9efd807fc6c -Author: Kareem Ergawy -Date: Wed Dec 18 06:52:24 2024 +0100 - - Revert "[flang][OpenMP] Implicitly map allocatable record fields (#117867)" (#120360) - -commit db09014a0747931026e31f40c4f541d110a5298c -Author: Kareem Ergawy -Date: Wed Dec 18 05:37:58 2024 +0100 - - [flang][OpenMP] Implicitly map allocatable record fields (#117867) - - This is a starting PR to implicitly map allocatable record fields. - - This PR contains the following changes: - 1. Re-purposes some of the utils used in `Lower/OpenMP.cpp` so that - these utils work on the `mlir::Value` level rather than the - `semantics::Symbol` level. This takes one step towards to enabling - MLIR passes to more easily do some lowering themselves (e.g. creating - `omp.map.bounds` ops for implicitely caputured data like this PR - does). - 2. Adds support for implicitely capturing and mapping allocatable fields - in record types. - - There is quite some distant to still cover to have full support for - this. I added a number of todos to guide further development. - - Co-authored-by: Andrew Gozillon - - Co-authored-by: Andrew Gozillon - -commit 9d33874936d83b8ddf5d028d313d810214f00f20 -Author: Slava Zakharin -Date: Tue Dec 17 09:06:05 2024 -0800 - - [flang] Support -f[no-]realloc-lhs. (#120165) - - -frealloc-lhs is the default. - If -fno-realloc-lhs is specified, then an allocatable on the left - side of an intrinsic assignment is not implicitly (re)allocated - to conform with the right hand side. Fortran runtime will issue - an error if there is a mismatch in shape/type/allocation-status. - -commit 75e6d0eb4d6ad1b58e5eb5c4d25371e6062cee44 -Author: Mats Petersson -Date: Fri Dec 13 14:05:48 2024 +0000 - - [flang][OpenMP]Add support for OpenMP ERROR directive (#119582) - - Lowering leads to a TODO, with a test to confirm. - - Also testing unparse. - - --------- - - Co-authored-by: Krzysztof Parzyszek - -commit 7c9404c279cfa13e24a043e6357cc85bd12f55f1 -Author: Ivan R. Ivanov -Date: Fri Dec 13 21:44:43 2024 +0900 - - [flang][OpenMP] Add frontend support for ompx_bare clause (#111106) - -commit db9856b516a36c259fb17af422cd80d6ebc67406 -Author: Leandro Lupori -Date: Wed Dec 11 16:26:19 2024 -0300 - - [flang][OpenMP][NFC] Turn symTable into a reference (#119435) - - Convert `DataSharingProcessor::symTable` from pointer to reference. - This avoids accidental null pointer dereferences and makes it - possible to use `symTable` when delayed privatization is disabled. - -commit 0469bb91aa82b331052d314de53546548e6eb060 -Author: Valentin Clement (バレンタイン クレメン) -Date: Tue Dec 10 09:48:15 2024 -0800 - - [flang][cuda] Fix lowering when step is a variable (#119421) - - Add missing conversion. - -commit edc50f3954af081b385cb03961899b5da1f1eb6b -Author: NimishMishra <42909663+NimishMishra@users.noreply.github.com> -Date: Tue Dec 10 16:55:06 2024 +0530 - - [flang][OpenMP] Add lowering support for task detach (#119128) - - This PR adds lowering task detach to MLIR. - -commit a88677edc0792534ba3157bf7d7a1b98e470f2fb -Author: Yusuke MINATO -Date: Tue Dec 10 16:26:53 2024 +0900 - - Reland "[flang] Integrate the option -flang-experimental-integer-overflow into -fno-wrapv" (#118933) - - This relands #110063. - The performance issue on 503.bwaves_r is found not to be related to the - patch, and is resolved by fbd89bcc when LTO is enabled. - -commit c91ba04328e1ded6f284469a7828d181324d4e30 -Author: Michael Kruse -Date: Fri Dec 6 15:29:00 2024 +0100 - - [Flang][NFC] Split runtime headers in preparation for cross-compilation. (#112188) - - Split some headers into headers for public and private declarations in - preparation for #110217. Moving the runtime-private headers in - runtime-private include directory will occur in #110298. - - * Do not use `sizeof(Descriptor)` in the compiler. The size of the - descriptor is target-dependent while `sizeof(Descriptor)` is the size of - the Descriptor for the host platform which might be too small when - cross-compiling to a different platform. Another problem is that the - emitted assembly ((cross-)compiling to the same target) is not identical - between Flang's running on different systems. Moving the declaration of - `class Descriptor` out of the included header will also reduce the - amount of #included sources. - - * Do not use `sizeof(ArrayConstructorVector)` and - `alignof(ArrayConstructorVector)` in the compiler. Same reason as with - `Descriptor`. - - * Compute the descriptor's extra flags without instantiating a - Descriptor. `Fortran::runtime::Descriptor` is defined in the runtime - source, but not the compiler source. - - * Move `InquiryKeywordHashDecode` into runtime-private header. The - function is defined in the runtime sources and trying to call it in the - compiler would lead to a link-error. - - * Move allocator-kind magic numbers into common header. They are the - only declarations out of `allocator-registry.h` in the compiler as well. - - This does not make Flang cross-compile ready yet, the main goal is to - avoid transitive header dependencies from Flang to clang-rt. There are - more assumptions that host platform is the same as the target platform. - -commit ff78cd5f3d6ae8e7084f0aff4df4164ff5a38af9 -Author: jeanPerier -Date: Thu Dec 5 14:09:48 2024 +0100 - - [flang] fix private pointers and default initialized variables (#118494) - - Both OpenMP privatization and DO CONCURRENT LOCAL lowering was incorrect - for pointers and derived type with default initialization. - - For pointers, the descriptor was not established with the rank/type - code/element size, leading to undefined behavior if any inquiry was made - to it prior to a pointer assignment (and if/when using the runtime for - pointer assignments, the descriptor must have been established). - - For derived type with default initialization, the copies were not - default initialized. - -commit 6003be7ef14bd95647e1ea6ec9685c1310f8ce58 -Author: vdonaldson <37090318+vdonaldson@users.noreply.github.com> -Date: Wed Dec 4 16:21:11 2024 -0500 - - [flang] IEEE_GET_UNDERFLOW_MODE, IEEE_SET_UNDERFLOW_MODE (#118551) - - Implement IEEE_GET_UNDERFLOW_MODE and IEEE_SET_UNDERFLOW_MODE. Update - IEEE_SUPPORT_UNDERFLOW_CONTROL to enable support for indvidual REAL - kinds. - -commit 03b5f8f0f0d10c412842ed04b90e2217cf071218 -Author: Mats Petersson -Date: Mon Dec 2 15:05:21 2024 +0000 - - [flang][OpenMP]Add parsing and semantics support for ATOMIC COMPARE (#117032) - - This adds a minimalistic implementation of parsing and semantics for the - ATOMIC COMPARE feature from OpenMP 5.1. - - There is no lowering, just a TODO for that part. Some of the Semantics - is also just a comment explaining that more is needed. - -commit 94488445cdd1657d1363a4994393b193c291b2cc -Author: Kareem Ergawy -Date: Mon Dec 2 15:01:09 2024 +0100 - - [flang][MLIR] Support delayed privatization for `wsloop` (PFT -> MLIR) (#118271) - - Adds PFT to MLIR lowering for delayed privatization of `omp.wsloop` ops. - Lowering to LLVM IR will be added in a later PR. - -commit 81f544d4659a96772c7e2ffed1bbe557993f4b34 -Author: Kareem Ergawy -Date: Thu Nov 28 05:15:06 2024 +0100 - - [flang][OpenMP] Rewrite `omp.loop` to semantically equivalent ops (#115443) - - Introduces a new conversion pass that rewrites `omp.loop` ops to their - semantically equivalent op nests bases on the surrounding/binding - context of the `loop` op. Not all forms of `omp.loop` are supported yet. - See `isLoopConversionSupported` for more info on which forms are - supported. - -commit e573c6b67eb729a625431121139100bebc61ba1f -Author: Yusuke MINATO -Date: Thu Nov 28 08:58:09 2024 +0900 - - [flang] Add nsw to DO loop parameters (#113854) - - nsw is added to DO loop parameters (initial parameters, terminal - parameters, and incrementation parameters). - This can help vectorization in some cases like #110609. - - See also the discussion in - https://discourse.llvm.org/t/rfc-add-nsw-flags-to-arithmetic-integer-operations-using-the-option-fno-wrapv/77584/20. - -commit 89b31c9c32f2cd2c038fe2b12d9c66a53c779fc1 -Author: Kiran Chandramohan -Date: Wed Nov 27 14:20:34 2024 +0000 - - [Flang][OpenMP] Fix a crash for declare target in an interface (#117709) - - This is a point fix for the crash in #116426. Leaving the bug open to - further explore declare target issues for interfaces. - -commit 3433e4140d18865fe784061a3cd029c5980f4e2f -Author: Valentin Clement (バレンタイン クレメン) -Date: Tue Nov 26 17:04:00 2024 -0800 - - [flang][cuda] Detect constant on the rhs of data transfer (#117806) - - When the rhs expression has some constants and a device symbol, an - implicit data transfer needs to be generated for the device symbol and - the computation with the constant is done on the host. - -commit b9e3a769b99e9dafa3e5205dbbef9fae8573e4e2 -Author: NimishMishra <42909663+NimishMishra@users.noreply.github.com> -Date: Tue Nov 26 16:10:26 2024 +0530 - - [flang][mlir][llvm][OpenMP] Add lowering and translation support for mergeable clause on task (#114662) - - Add FIR generation and LLVMIR translation support for mergeable clause - on task construct. If mergeable clause is present on a task, the - relevant flag in `ompt_task_flag_t` is set and passed to - `__kmpc_omp_task_alloc`. - -commit bb8bf858e865ec3119352bdef43c09adb4c93b31 -Author: jeanPerier -Date: Tue Nov 26 09:21:13 2024 +0100 - - [flang] add internal_assoc flag to mark variable captured in internal procedure (#117161) - - This patch adds a flag to mark hlfir.declare of host variables that are - captured in some internal procedure. - - It enables implementing a simple fir.call handling in - fir::AliasAnalysis::getModRef leveraging Fortran language specifications - and without a data flow analysis. - - This will allow implementing an optimization for "array = - array_function()" where array storage is passed directly into the hidden - result argument to "array_function" when it can be proven that - arraY_function does not reference "array". - - Captured host variables are very tricky because they may be accessed - indirectly in any calls if the internal procedure address was captured - via some global procedure pointer. Without flagging them, there is no - way around doing a complex inter procedural data flow analysis: - - checking that the call is not made to an internal procedure is not - enough because of the possibility of indirect calls made to internal - procedures inside the callee. - - checking that the current func.func has no internal procedure is not - enough because this would be invalid with inlining when an procedure - with internal procedures is inlined inside a procedure without internal - procedure. - -commit 7d6713db600af1b4381149a0e794cbce99ca6cb2 -Author: Ivan R. Ivanov -Date: Tue Nov 19 16:58:30 2024 +0900 - - [flang][omp] Emit omp.workshare in frontend (#101444) - - Emit the contents of OpenMP workshare constructs in `omp.workshare`. - -commit 4c4a4134d5c0a0f9476b157862d378a7e571e9f0 -Author: Krzysztof Parzyszek -Date: Mon Nov 18 07:04:10 2024 -0600 - - [flang][OpenMP] Update frontend support for DEFAULTMAP clause (#116506) - - Add ALL variable category, implement semantic checks to verify the - validity of the clause, improve error messages, add testcases. - - The variable category modifier is optional since 5.0, make sure we allow - it to be missing. If it is missing, assume "all" in clause conversion. - -commit b4c0ef18226b7d1f82d71fc0171b99caec0d8d12 -Author: Kareem Ergawy -Date: Mon Nov 18 08:18:47 2024 +0100 - - [flang][OpenMP] Add MLIR lowering for `loop ... bind` (#114219) - - Extends MLIR lowering support for the `loop` directive by adding - lowering support for the `bind` clause. - - Parent PR: https://github.com/llvm/llvm-project/pull/114199, only the - latest commit is relevant to this PR. - -commit fd3ff2007ab30c74772572798f3e494fdaac7ac2 -Author: Kareem Ergawy -Date: Mon Nov 18 06:23:27 2024 +0100 - - [flang][OpenMP] Add basic support to lower `loop` directive to MLIR (#114199) - - Adds initial support for lowering the `loop` directive to MLIR. - - The PR includes basic suport and testing for the following clauses: - * `collapse` - * `order` - * `private` - * `reduction` - - Parent PR: #113911, only the latest commit is relevant to this PR. - -commit e508bacce45d4fb2ba07d02c55391b858000c3b3 -Author: agozillon -Date: Sat Nov 16 12:28:37 2024 +0100 - - [Flang][OpenMP] Derived type explicit allocatable member mapping (#113557) - - This PR is one of 3 in a PR stack, this is the primary change set which - seeks to extend the current derived type explicit member mapping support - to handle descriptor member mapping at arbitrary levels of nesting. The - PR stack seems to do this reasonably (from testing so far) but as you - can create quite complex mappings with derived types (in particular when - adding allocatable derived types or arrays of allocatable derived types) - I imagine there will be hiccups, which I am more than happy to address. - There will also be further extensions to this work to handle the - implicit auto-magical mapping of descriptor members in derived types and - a few other changes planned for the future (with some ideas on - optimizing things). - - The changes in this PR primarily occur in the OpenMP lowering and the - OMPMapInfoFinalization pass. - - In the OpenMP lowering several utility functions were added or extended - to support the generation of appropriate intermediate member mappings - which are currently required when the parent (or multiple parents) of a - mapped member are descriptor types. We need to map the entirety of these - types or do a "deep copy" for lack of a better term, where we map both - the base address and the descriptor as without the copying of both of - these we lack the information in the case of the descriptor to access - the member or attach the pointers data to the pointer and in the latter - case we require the base address to map the chunk of data. Currently we - do not segment descriptor based derived types as we do with regular - non-descriptor derived types, we effectively map their entirety in all - cases at the moment, I hope to address this at some point in the future - as it adds a fair bit of a performance penalty to having nestings of - allocatable derived types as an example. The process of mapping all - intermediate descriptor members in a members path only occurs if a - member has an allocatable or object parent in its symbol path or the - member itself is a member or allocatable. This occurs in the - createParentSymAndGenIntermediateMaps function, which will also generate - the appropriate address for the allocatable member within the derived - type to use as a the varPtr field of the map (for intermediate - allocatable maps and final allocatable mappings). In this case it's - necessary as we can't utilise the usual Fortran::lower functionality - such as gatherDataOperandAddrAndBounds without causing issues later in - the lowering due to extra allocas being spawned which seem to affect the - pointer attachment (at least this is my current assumption, it results - in memory access errors on the device due to incorrect map information - generation). This is similar to why we do not use the MLIR value - generated for this and utilise the original symbol provided when mapping - descriptor types external to derived types. Hopefully this can be - rectified in the future so this function can be simplified and more - closely aligned to the other type mappings. We also make use of - fir::CoordinateOp as opposed to the HLFIR version as the HLFIR version - doesn't support the appropriate lowering to FIR necessary at the moment, - we also cannot use a single CoordinateOp (similarly to a single GEP) as - when we index through a descriptor operation (BoxType) we encounter - issues later in the lowering, however in either case we need access to - intermediate descriptors so individual CoordinateOp's aid this - (although, being able to compress them into a smaller amount of - CoordinateOp's may simplify the IR and perhaps result in a better end - product, something to consider for the future). - - The other large change area was in the OMPMapInfoFinalization pass, - where the pass had to be extended to support the expansion of box types - (or multiple nestings of box types) within derived types, or box type - derived types. This requires expanding each BoxType mapping from one - into two maps and then modifying all of the existing member indices of - the overarching parent mapping to account for the addition of these new - members alongside adjusting the existing member indices to support the - addition of these new maps which extend the original member indices (as - a base address of a box type is currently considered a member of the box - type at a position of 0 as when lowered to LLVM-IR it's a pointer - contained at this position in the descriptor type, however, this means - extending mapped children of this expanded descriptor type to - additionally incorporate the new member index in the correct location in - its own index list). I believe there is a reasonable amount of comments - that should aid in understanding this better, alongside the test - alterations for the pass. - - A subset of the changes were also aimed at making some of the utilities - for packing and unpacking the DenseIntElementsAttr containing the member - indices shareable across the lowering and OMPMapInfoFinalization, this - required moving some functions to the Lower/Support/Utils.h header, and - transforming the lowering structure containing the member index data - into something more similar to the version used in - OMPMapInfoFinalization. There we also some other attempts at tidying - things up in relation to the member index data generation in the - lowering, some of which required creating a logical operator for the - OpenMP ID class so it can be utilised as a map key (it simply utilises - the symbol address for the moment as ordering isn't particularly - important). - - Otherwise I have added a set of new tests encompassing some of the - mappings currently supported by this PR (unfortunately as you can have - arbitrary nestings of all shapes and types it's not very feasible to - cover them all). - -commit e67e09a77ea1e4802c0f6bc0409c9f5e9d1fae9a -Author: Anchu Rajendran S -Date: Fri Nov 15 09:10:36 2024 -0800 - - [Flang][OpenMP][Sema] Adding parsing and semantic support for scan directive. (#102792) - -commit ff7fca7fa8646d73f884ab8a351e4178499c4d05 -Author: khaki3 <47756807+khaki3@users.noreply.github.com> -Date: Fri Nov 15 08:44:42 2024 -0800 - - [flang][cuda] Support memory cleanup at a return statement (#116304) - - We generate `cuf.free` and `func.return` twice if a return statement - exists at the end of program. - - ```f90 - program test - integer, device :: a(10) - return - end - ``` - - ``` - % flang -x cuda test.cuf -mmlir --mlir-print-ir-after-all - error: loc("/path/to/test.cuf":3:3): 'func.return' op must be the last operation in the parent block - // -----// IR Dump After Fortran::lower::VerifierPass Failed () //----- // - ``` - - Dumped IR: - ```mlir - "func.func"() <{function_type = () -> (), sym_name = "_QQmain"}> ({ - ... - "cuf.free"(%5#1) <{data_attr = #cuf.cuda}> : (!fir.ref>) -> () - "func.return"() : () -> () - "cuf.free"(%5#1) <{data_attr = #cuf.cuda}> : (!fir.ref>) -> () - "func.return"() : () -> () - } - ... - ``` - - The routine `genExitRoutine` in `Bridge.cpp` is guarded by - `blockIsUnterminated()` to make sure that `func.return` is generated - only at the end of a block. However, we redundantly run - `bridge.fctCtx().finalizeAndKeep()` before `genExitRoutine` in this - case, resulting in two pairs of `cuf.free` and `func.return`. This PR - fixes `Bridge.cpp` by using `blockIsUnterminated()` to guard - `finalizeAndKeep` as well. - -commit ec1e0c5ecd53e415b23d5bd40b8e44e3ef4b4d92 -Author: Mats Petersson -Date: Thu Nov 14 09:35:34 2024 +0000 - - [Flang][OMP]Add support for DECLARE MAPPER parsing and semantics (#115160) - - Will hit a TODO in the lowering, which there are tests added to check - for this happening. - -commit 37143fe27e082b478d333ca28f6f1af5210b7c6b -Author: Valentin Clement (バレンタイン クレメン) -Date: Tue Nov 12 16:49:44 2024 -0800 - - [flang][cuda] Make launch configuration optional for cuf kernel (#115947) - -commit 8f9dbb0a780feed60416ebc6ef8e89f4b0c2dca7 -Author: Tom Eccles -Date: Mon Nov 11 11:18:21 2024 +0000 - - [flang][OpenMP] delayed privatisation lowering for TASK (#113591) - -commit 90764582872bc4bd9613646b347b49c60ce2bc72 -Author: Sergio Afonso -Date: Mon Nov 4 10:32:48 2024 +0000 - - Revert "[Flang][OpenMP] Disable lowering of omp.simd reductions in co… (#113683) - - …mposites (#112686)" - - Lowering of reductions in composite operations can now be re-enabled, - since previous commits in this PR stack fix the MLIR representation - produced and it no longer triggers a compiler crash during translation - to LLVM IR. - - This reverts commit c44860c8d2582abd88794267b4fa0fa953bbef80. - -commit 6c28530ed082204a1b6d20b45482e81d4cd5ead4 -Author: Sergio Afonso -Date: Thu Oct 31 16:39:53 2024 +0000 - - [Flang][OpenMP] Properly bind arguments of composite operations (#113682) - - When composite constructs are lowered, clauses for each leaf construct - are lowered before creating the set of loop wrapper operations, using - these outside values to populate their operand lists. Then, when the - loop nest associated to that composite construct is lowered, the binding - of Fortran symbols to the entry block arguments defined by these loop - wrappers is performed, resulting in the creation of `hlfir.declare` - operations in the entry block of the `omp.loop_nest`. - - This approach prevents `hlfir.declare` operations related to the binding - and other operations resulting from the evaluation of the clauses from - being inserted between loop wrapper operations, which would be an - illegal MLIR representation. However, this introduces the problem of - entry block arguments defined by a wrapper that then should be used by - one of its nested wrappers, because the corresponding Fortran symbol - would still be mapped to an outside value at the time of gathering the - list of operands for the nested wrapper. - - This patch adds operand re-mapping logic to update wrappers without - changing when clauses are evaluated or where the `hlfir.declare` - creation is performed. - -commit 06984825061f1bf7c70087833a8d4f6d9feb2865 -Author: Kareem Ergawy -Date: Thu Oct 31 09:19:18 2024 +0100 - - [flang][MLIR] Hoist `do concurrent` nest bounds/steps outside the nest (#114020) - - If you have the following multi-range `do concurrent` loop: - - ```fortran - do concurrent(i=1:n, j=1:bar(n*m, n/m)) - a(i) = n - end do - ``` - - Currently, flang generates the following IR: - - ```mlir - fir.do_loop %arg1 = %42 to %44 step %c1 unordered { - ... - %53:3 = hlfir.associate %49 {adapt.valuebyref} : (i32) -> (!fir.ref, !fir.ref, i1) - %54:3 = hlfir.associate %52 {adapt.valuebyref} : (i32) -> (!fir.ref, !fir.ref, i1) - %55 = fir.call @_QFPbar(%53#1, %54#1) fastmath : (!fir.ref, !fir.ref) -> i32 - hlfir.end_associate %53#1, %53#2 : !fir.ref, i1 - hlfir.end_associate %54#1, %54#2 : !fir.ref, i1 - %56 = fir.convert %55 : (i32) -> index - ... - fir.do_loop %arg2 = %46 to %56 step %c1_4 unordered { - ... - } - } - ``` - - However, if `bar` is impure, then we have a direct violation of the - standard: - - ``` - C1143 A reference to an impure procedure shall not appear within a DO CONCURRENT construct. - ``` - - Moreover, the standard describes the execution of `do concurrent` - construct in multiple stages: - - ``` - 11.1.7.4 Execution of a DO construct - ... - 11.1.7.4.2 DO CONCURRENT loop control - The concurrent-limit and concurrent-step expressions in the concurrent-control-list are evaluated. ... - - 11.1.7.4.3 The execution cycle - ... - The block of a DO CONCURRENT construct is executed for every active combination of the index-name values. - Each execution of the block is an iteration. The executions may occur in any order. - ``` - - From the above 2 points, it seems to me that execution is divided in - multiple consecutive stages: 11.1.7.4.2 is the stage where we evaluate - all control expressions including the step and then 11.1.7.4.3 is the - stage to execute the block of the concurrent loop itself using the - combination of possible iteration values. - -commit c478aab684be007ac14e51565c0d4ae39293d208 -Author: Krzysztof Parzyszek -Date: Wed Oct 30 08:36:08 2024 -0500 - - [flang][OpenMP] Parser support for DEPOBJ plus DEPEND, DESTROY, UPDATE (#114074) - - Parse the DEPOBJ construct and the associated clauses, perform basic - semantic checks. - -commit 55e4e3ff653356a9079906e209099684723caa4c -Author: Sergio Afonso -Date: Wed Oct 30 12:07:47 2024 +0000 - - [Flang][OpenMP] Access full list of entry block syms and vars (NFC) (#113681) - - This patch adds methods to `EntryBlockArgs` to access the full list of - entry block argument-related symbols and variables, in their standard - order. This helps centralizing this logic in as few places as possible - to avoid future inconsistencies. - -commit bd6ab32e6eb642f2b0b15be8c7c2a668192f07d8 -Author: Yusuke MINATO -Date: Mon Oct 28 23:19:20 2024 +0900 - - Revert "[flang] Integrate the option -flang-experimental-integer-overflow into -fno-wrapv" (#113901) - - Reverts llvm/llvm-project#110063 due to the performance regression on - 503.bwaves_r in SPEC2017. - -commit 843c2fbe7f983c2a2059f753e4494f06fb645a9e -Author: Kiran Chandramohan -Date: Fri Oct 25 18:57:01 2024 +0100 - - Add parser+semantics support for scope construct (#113700) - - Test parsing, semantics and a couple of basic semantic checks for - block/worksharing constructs. - Add TODO message in lowering. - -commit 96bb375f5cedcfcc5dcd96296ba54ff933b39d4d -Author: Yusuke MINATO -Date: Fri Oct 25 15:20:23 2024 +0900 - - [flang] Integrate the option -flang-experimental-integer-overflow into -fno-wrapv (#110063) - - nsw is now added to do-variable increment when -fno-wrapv is enabled as - GFortran seems to do. - That means the option introduced by #91579 isn't necessary any more. - - Note that the feature of -flang-experimental-integer-overflow is enabled - by default. - -commit ea3534b385a713639953fb5dfd287af87b52bead -Author: Krzysztof Parzyszek -Date: Thu Oct 24 05:54:35 2024 -0500 - - [flang][OpenMP] Parse AFFINITY clause, lowering not supported yet (#113485) - - Implement parsing of the AFFINITY clause on TASK construct, conversion - from the parser class to omp::Clause. - Lowering to HLFIR is unsupported, a TODO message is displayed. - -commit c44860c8d2582abd88794267b4fa0fa953bbef80 -Author: Sergio Afonso -Date: Mon Oct 21 14:32:21 2024 +0100 - - [Flang][OpenMP] Disable lowering of omp.simd reductions in composites (#112686) - - Currently, the `omp.simd` operation is ignored during MLIR to LLVM IR - translation when it takes part in a composite construct. One consequence - of this limitation is that any entry block arguments defined by that - operation will trigger a compiler crash if they are used anywhere, as - they are not bound to an LLVM IR value. - - A previous PR introducing support for the `reduction` clause resulted in - the creation and use of entry block arguments attached to the `omp.simd` - operation, causing compiler crashes on 'do simd reduction(...)' - constructs. - - This patch disables Flang lowering of simd reductions in 'do simd' - constructs to avoid triggering these errors while translation to LLVM IR - is still incomplete. - -commit 15d85769f119061fbfcae6e9de43982b534ef724 -Author: Sergio Afonso -Date: Wed Oct 16 10:27:50 2024 +0100 - - [Flang][OpenMP] Support lowering of simd reductions (#112194) - - This patch enables lowering to MLIR of the reduction clause of `simd` - constructs. Lowering from MLIR to LLVM IR remains unimplemented, so at - that stage it will result in errors being emitted rather than silently - ignoring it as it is currently done. - - On composite `do simd` constructs, this lowering error will remain - untriggered, as the `omp.simd` operation in that case is currently - ignored. The MLIR representation, however, will now contain `reduction` - information. - -commit 0a17bdfc361400cb511368f2edfc68c0d11e1974 -Author: Sergio Afonso -Date: Tue Oct 15 11:28:39 2024 +0100 - - [MLIR][OpenMP] Remove terminators from loop wrappers (#112229) - - This patch simplifies the representation of OpenMP loop wrapper - operations by introducing the `NoTerminator` trait and updating - accordingly the verifier for the `LoopWrapperInterface`. - - Since loop wrappers are already limited to having exactly one region - containing exactly one block, and this block can only hold a single - `omp.loop_nest` or loop wrapper and an `omp.terminator` that does not - return any values, it makes sense to simplify the representation of loop - wrappers by removing the terminator. - - There is an extensive list of Lit tests that needed updating to remove - the `omp.terminator`s adding some noise to this patch, but actual - changes are limited to the definition of the `omp.wsloop`, `omp.simd`, - `omp.distribute` and `omp.taskloop` loop wrapper ops, Flang lowering for - those, `LoopWrapperInterface::verifyImpl()`, SCF to OpenMP conversion - and OpenMP dialect documentation. - -commit 839344f025fb7eff529735873f327330618b2ebb -Author: Tarun Prabhu -Date: Mon Oct 14 08:44:24 2024 -0600 - - [clang][flang][mlir] Reapply "Support -frecord-command-line option (#102975)" - - The underlying issue was caused by a file included in two different - places which resulted in duplicate definition errors when linking - individual shared libraries. This was fixed in c3201ddaeac02a2c86a38b - [#109874]. - -commit 0163ac1f53abc0a0f6e5b7e56912c1dee67e7f32 -Author: Mats Petersson -Date: Fri Oct 11 12:23:37 2024 +0100 - - [Flang][OpenMP]Add tests for TODOs and small changes to improve messages (#111562) - - The bulk of this change are new tests to check that we get a "Not yet - implemneted: *some stuff here*" message when using some not yet - supported OpenMP functionality. - - For some of these cases, this also means adding additional clauses to a - filter list in OpenMP.cpp - this changes nothing [to the best of my - understanding] other than allowing the clause to get to the point where - it can be rejected in a TODO with a more clear message. One of the TOOD - filters were missing Mergeable clause, so this was also added and the - existing test updated for the new more specific error message. - - There is no functional change intended here. - -commit e71ac933716dc9a747b93b73e899e50b421ebcde -Author: Sergio Afonso -Date: Wed Oct 9 10:24:04 2024 +0100 - - [Flang][OpenMP] Properly reserve space for entry block argument lists (NFC) (#111529) - - This patch adds the size for `use_device_ptr`, which was missing. - -commit b124c04597166cc93ca791d0ad07834c85de824d -Author: Sergio Afonso -Date: Wed Oct 9 10:21:47 2024 +0100 - - [Flang][OpenMP] Remove omp.simd reduction block args (#111523) - - This patch reverts previous changes to create entry block arguments for - reduction variables attached to `simd` constructs. - - This can't currently be done because reduction variables stored in the - corresponding clause structure are not added to the `omp.simd` operation - when created, as this is not supported yet. Adding block arguments for - non-existent reduction variables results in some tests from the Fujitsu - compiler testsuite breaking: - https://linaro.atlassian.net/browse/LLVM-1389. - -commit 88478a89cd85adcc32f2a321ef9e9906c5fdbe26 -Author: Sergio Afonso -Date: Mon Oct 7 11:26:35 2024 +0100 - - [Flang][OpenMP] Improve entry block argument creation and binding (#110267) - - The main purpose of this patch is to centralize the logic for creating - MLIR operation entry blocks and for binding them to the corresponding - symbols. This minimizes the chances of mixing arguments up for - operations having multiple entry block argument-generating clauses and - prevents divergence while binding arguments. - - Some changes implemented to this end are: - - Split into two functions the creation of the entry block, and the - binding of its arguments and the corresponding Fortran symbol. This - enabled a significant simplification of the lowering of composite - constructs, where it's no longer necessary to manually ensure the lists - of arguments and symbols refer to the same variables in the same order - and also match the expected order by the `BlockArgOpenMPOpInterface`. - - Removed redundant and error-prone passing of types and locations from - `ClauseProcessor` methods. Instead, these are obtained from the values - in the appropriate clause operands structure. This also simplifies - argument lists of several lowering functions. - - Access block arguments of already created MLIR operations through the - `BlockArgOpenMPOpInterface` instead of directly indexing the argument - list of the operation, which is not scalable as more entry block - argument-generating clauses are added to an operation. - - Simplified the implementation of `genParallelOp` to no longer need to - define different callbacks depending on whether delayed privatization is - enabled. - -commit 2f245875b2f71272e6d7a78b4aed5be81109e9b9 -Author: Kareem Ergawy -Date: Fri Oct 4 15:24:00 2024 +0200 - - [flang][OpenMP] Handle unstructured CF in compound loop constructs (#111111) - - Fixes a bug in handling unstructured control-flow in compound loop - constructs. The fix makes sure that unstructured CF does not get lowered - until we reach the last item of the compound construct. This way, we - avoid moving block of unstructured loops in-between the middle items of - the construct and messing (i.e. adding operations) to these block while - doing so. - -commit c4204c0b29a6721267b1bcbaeedd7b1118e42396 -Author: jeanPerier -Date: Thu Oct 3 17:10:57 2024 +0200 - - [flang] replace fir.complex usages with mlir complex (#110850) - - Core patch of - https://discourse.llvm.org/t/rfc-flang-replace-usages-of-fir-complex-by-mlir-complex-type/82292. - After that, the last step is to remove fir.complex from FIR types. - -commit f98244392b4e3d4075c03528dcec0b268ba13ab7 -Author: Krzysztof Parzyszek -Date: Wed Oct 2 15:36:45 2024 -0500 - - [flang][OpenMP] Parse lastprivate modifier, add TODO to lowering (#110568) - - Parse the lastprivate clause with a modifier. Codegen for it is not yet - implemented. - -commit d0f67773b213383b6e1c9331fb00f2d4c14bfcb2 -Author: Sergio Afonso -Date: Tue Oct 1 15:04:27 2024 +0100 - - [MLIR][OpenMP] Normalize handling of entry block arguments (#109808) - - This patch introduces a new MLIR interface for the OpenMP dialect aimed - at providing a uniform way of verifying and handling entry block - arguments defined by OpenMP clauses. - - The approach consists in defining a set of overrideable methods that - return the number of block arguments the operation holds regarding each - of the clauses that may define them. These by default return 0, but they - are overriden by the corresponding clause through the - `extraClassDeclaration` mechanism. - - Another set of interface methods to get the actual lists of block - arguments is defined, which is implemented based on the previously - described methods. These implicitly define a standardized ordering - between the list of block arguments associated to each clause, based on - the alphabetical ordering of their names. They should be the preferred - way of matching operation arguments and entry block arguments to that - operation's first region. - - Some updates are made to the printing/parsing of `omp.parallel` to - follow the expected order between `private` and `reduction` clauses, as - well as the MLIR to LLVM IR translation pass to access block arguments - using the new interface. Unit tests of operations impacted by additional - verification checks and sorting of entry block arguments. - -commit 497523b695d06c8bf9f3aaf5a5cb4414a5b0625b -Author: Kareem Ergawy -Date: Thu Sep 26 12:28:14 2024 +0200 - - [flang][OpenMP] Delayed privatization MLIR lowering support for `distribute` (#109632) - - Starts delayed privatizaiton support for standalone `distribute` - directives. Other flavours of `distribute` are still TODO as well as - MLIR to LLVM IR lowering. - -commit 737c414e1d9578e5037e68e3b3f6ddea507f8243 -Author: David Spickett -Date: Fri Sep 20 11:19:12 2024 +0000 - - Revert "[clang][flang][mlir] Support -frecord-command-line option (#102975)" - - This reverts commit b3533a156da92262eb19429d8c12f53e87f5ccec. - - It caused test failures in shared library builds: - https://lab.llvm.org/buildbot/#/builders/80/builds/3854 - -commit b3533a156da92262eb19429d8c12f53e87f5ccec -Author: Tarun Prabhu -Date: Thu Sep 19 18:28:50 2024 -0600 - - [clang][flang][mlir] Support -frecord-command-line option (#102975) - - Add support for the -frecord-command-line option that will produce the - llvm.commandline metadata which will eventually be saved in the object - file. This behavior is also supported in clang. Some refactoring of the - code in flang to handle these command line options was carried out. The - corresponding -grecord-command-line option which saves the command line - in the debug information has not yet been enabled for flang. - -commit 5aaf384b1614fcef5504d0b16d3e5063f72943c1 -Author: Tom Eccles -Date: Mon Sep 16 12:33:37 2024 +0100 - - [flang][NFC] use llvm.intr.stacksave/restore instead of opaque calls (#108562) - - The new LLVM stack save/restore intrinsic operations are more convenient - than function calls because they do not add function declarations to the - module and therefore do not block the parallelisation of passes. - Furthermore they could be much more easily marked with memory effects - than function calls if that ever proved useful. - - This builds on top of #107879. - - Resolves #108016 - -commit b54be00a29f8dabf9b0d9ec69373e859bc75ded4 -Author: Sergio Afonso -Date: Mon Sep 16 12:03:30 2024 +0100 - - [Flang][OpenMP] Process motion clauses in a single call (NFC) (#108046) - - This patch removes the template parameter of the - `ClauseProcessor::processMotionClauses()` method and instead processes - both `TO` and `FROM` as part of a single call. This also enables moving - the implementation out of the header and makes it simpler for a - follow-up patch to potentially refactor `processMap()`, - `processMotionClauses()`, `processUseDeviceAddr()` and - `processUseDevicePtr()`, and minimize code duplication among these. - -commit 8e10a3f80e264aaa186ab3cc74fea840f453c66d -Author: Mats Petersson -Date: Fri Sep 13 12:57:11 2024 +0100 - - [flang][OpenMP] don't privatise loop index marked shared (#108176) - - Mark the symbol with OmpShared, and then check that later in lowering to - avoid making a local loop index. - - OpenMP 5.2 says: "Loop iteration variables of loops that are not associated - with any OpenMP directive maybe listed in data-sharing attribute clauses on - the surrounding teams, parallel or taskgenerating construct, and on enclosed - constructs, subject to other restrictions." - - Tests updated to match the extra OmpShared attribute. - - Add regression test for lowering to hlfir. - - Closes #102961 - - --------- - - Co-authored-by: Tom Eccles - -commit 70ef5eb6f087524dc952a8f5249b79f4a4000e04 -Author: harishch4 -Date: Fri Sep 13 10:11:56 2024 +0530 - - [Flang][OpenMP] Lowering nontemporal clause to MLIR for SIMD directive (#108339) - - Currently, Flang throws a "**not yet implemented: Unhandled clause - NONTEMPORAL in SIMD construct**" error when encountering nontemporal - clause. This patch adds support for this clause in SIMD construct. - -commit 53b59022b07317fa01bf8601d12915dce424baf0 -Author: David Truby -Date: Tue Sep 10 14:59:21 2024 +0100 - - [flang][OpenMP] Implement copyin for pointers and allocatables. (#107425) - - The copyin clause currently forbids pointer and allocatable variables, - which are allowed by the OpenMP 1.1 and 3.0 specifications respectively. - -commit 433ca3ebbef50002bec716ef2c6d6a82db71048d -Author: Sergio Afonso -Date: Tue Sep 10 11:09:25 2024 +0100 - - [Flang][Lower] Introduce SymMapScope helper class (NFC) (#107866) - - This patch creates a simple RAII wrapper class for `SymMap` to make it - easier to use and prevent a missing matching `popScope()` for a - `pushScope()` call on simple use cases. - - Some push-pop pairs are replaced with instances of the new class by this - patch. - -commit 797f01198e8b41982916ba02d703bd6a96b5347e -Author: Leandro Lupori -Date: Thu Sep 5 14:55:01 2024 -0300 - - [flang][OpenMP] Make lastprivate work with reallocated variables (#106559) - - Fixes https://github.com/llvm/llvm-project/issues/100951 - -commit c81b43074ab010d01ad794224dd9dd22bbe8a1f7 -Author: Valentin Clement (バレンタイン クレメン) -Date: Wed Sep 4 08:43:13 2024 -0700 - - [flang][cuda] Fix lowering of cuf kernel with unstructured nested construct (#107149) - - Lowering was crashing when cuf kernels has an unstructured construct. - Blocks created by PFT need to be re-created inside of the operation like - it is done for OpenACC construct. - -commit 9ba41031de105d7babf3ae53facd368f2b4e409f -Author: Akash Banerjee -Date: Wed Sep 4 12:35:44 2024 +0100 - - [OpenMP]Update use_device_clause lowering (#101703) - - This patch updates the use_device_ptr and use_device_addr clauses to use - the mapInfoOps for lowering. This allows all the types that are handle - by the map clauses such as derived types to also be supported by the - use_device_clauses. - - This is patch 1/2 in a series of patches. - - Co-authored-by: Raghu Maddhipatla raghu.maddhipatla@amd.com - -commit 8586d0330e36b22496f9ba5ed116bc1aac5a1f28 -Author: vdonaldson <37090318+vdonaldson@users.noreply.github.com> -Date: Fri Aug 30 09:07:30 2024 -0400 - - [flang] Don't generate empty else blocks (#106618) - - Code lowering always generates fir.if else blocks for source level if - statements, whether needed or not. Change this to only generate else - blocks that are needed. - -commit d4c519e7b2ac21350ec08b23eda44bf4a2d3c974 -Author: Valentin Clement (バレンタイン クレメン) -Date: Thu Aug 29 22:37:20 2024 -0700 - - [flang][cuda] Do inline allocation/deallocation in device code (#106628) - - ALLOCATE and DEALLOCATE statements can be inlined in device function. - This patch updates the condition that determined to inline these actions - in lowering. - - This avoid runtime calls in device function code and can speed up the - execution. - - Also move `isCudaDeviceContext` from `Bridge.cpp` so it can be used - elsewhere. - -commit 0a41c8e7a050c837c609cbcbc8342024701cd14b -Author: Valentin Clement (バレンタイン クレメン) -Date: Thu Aug 29 11:27:42 2024 -0700 - - [flang][cuda] Avoid generating cuf.data_transfer in OpenACC region (#106435) - - `cuf.data_transfer` will be converted to runtime calls to cuda runtime - api and these are not supported in device code. assignment in OpenACC - region will be handled by the OpenACC code gen so we avoid to generate - data transfer on them. - -commit 57726c440c30b3f0b5ebfaf345b0237df4430259 -Author: Sergio Afonso -Date: Thu Aug 29 11:46:23 2024 +0100 - - [Flang][OpenMP] DISTRIBUTE PARALLEL DO SIMD lowering (#106211) - - This patch adds PFT to MLIR lowering support for `distribute parallel do - simd` composite constructs. - -commit 9c8ce5fac8a05e27cca832fb3913ec986b120211 -Author: Sergio Afonso -Date: Thu Aug 29 11:45:28 2024 +0100 - - [Flang][OpenMP] DISTRIBUTE PARALLEL DO lowering (#106207) - - This patch adds PFT to MLIR lowering support for `distribute parallel - do` composite constructs. - -commit 0f206b19c3303aeb8e527b4977da2bd301464a9b -Author: Sergio Afonso -Date: Thu Aug 29 11:44:20 2024 +0100 - - [Flang][OpenMP] Move loop privatization out of dispatch (#106066) - - This patch moves the creation of `DataSharingProcessor` instances for - loop constructs out of `genOMPDispatch()` and into their corresponding - codegen functions. This is a necessary first step to enable a proper - handling of privatization on composite constructs. - - Some tests are updated due to a change of order between clause - processing and privatization. - -commit 60e9fb9dae0e041cb468210f5795e9d59e70cccf -Author: Sergio Afonso -Date: Thu Aug 29 10:37:00 2024 +0100 - - [Flang][OpenMP] Don't expect block arguments using early privatization (#105842) - - There are some spots where all symbols to privatize collected by a - `DataSharingProcessor` instance are expected to have corresponding entry - block arguments associated regardless of whether delayed privatization - was enabled. - - This can result in compiler crashes if a `DataSharingProcessor` instance - created with `useDelayedPrivatization=false` is queried in this way. The - solution proposed by this patch is to provide another public method to - query specifically delayed privatization symbols, which will either be - empty or point to the complete set of symbols to privatize accordingly. - -commit ccbee7116b1d55ab578632635dbf5a7352bbdace -Author: Valentin Clement (バレンタイン クレメン) -Date: Tue Aug 27 17:36:31 2024 -0700 - - [flang][cuda] Use declare op results instead of memref (#106287) - - #106120 Simplify the data transfer when possible by using the reference - and a shape. This bypass the declare op. In order to keep the declare op - around, use the second results of the declare op which achieve the same. - -commit 900cd627582349381bcc0ee74054ca4d9efb55df -Author: Valentin Clement (バレンタイン クレメン) -Date: Tue Aug 27 10:03:15 2024 -0700 - - [flang][cuda] Simplify data transfer when possible (#106120) - - When possible, avoid using descriptors and use the reference and the - shape for data_transfer. - -commit 7af61d5cf464f1d716c82bc77907fa3fe4ebc841 -Author: Valentin Clement (バレンタイン クレメン) -Date: Mon Aug 26 09:50:17 2024 -0700 - - [flang][cuda] Add shape to cuf.data_transfer operation (#104631) - - When doing data transfer with dynamic sized array, we are currently - generating a data transfer between two descriptors. If the shape values - can be provided, we can keep the data transfer between two references. - This patch adds the shape operands to the operation. - - This will be exploited in lowering in a follow up patch. - -commit f4cf93fb509c53771d61a973f27be9b1a90dee0a -Author: agozillon -Date: Fri Aug 23 19:48:43 2024 +0200 - - [Flang][OpenMP] Align map clause generation and fix issue with non-shared allocations for assumed shape/size descriptor types (#97855) - - This PR aims to unify the map argument generation behavior across both - the implicit capture (captured in a target region) and the explicit - capture (process map), currently the varPtr field of the MapInfo for the - same variable will be different depending on how it's captured. This PR - tries to align that across the generations of MapInfoOp in the OpenMP - lowering. - - Currently, I have opted to utilise the rawInput (input memref to a HLFIR - DeclareInfoOp) as opposed to the addr field which includes more - information. The side affect of this is that we have to deal with - BoxTypes less often, which will result in simpler maps in these cases. - The negative side affect of this is that we don't have access to the - bounds information through the resulting value, however, I believe the - bounds information we require in our case is still appropriately stored - in the map bounds, and this seems to be the case from testing so far. - - The other fix is for cases where we end up with a BoxType argument into - a function (certain assumed shape and sizes cases do this) that has no - fir.ref wrapping it. As we need the Box to be a reference type to - actually utilise the operation to access the base address stored inside - and create the correct mappings we currently generate an intermediate - allocation in these cases, and then store into it, and utilise this as - the map argument, as opposed to the original. - - However, as we were not sharing the same intermediate allocation across - all of the maps for a variable, this resulted in errors in certain cases - when detatching/attatching the data e.g. via enter and exit. This PR - adjusts this for cases - - Currently we only maintain tracking of all intermediate allocations for - the current function scope, as opposed to module. Primarily as the only - case I am aware of that this is required is in cases where we pass - certain types of arguments to functions (so I opted to minimize the - overhead of the pass for now). It could likely be extended to module - scope if required if we find other cases where it's applicable and - causing issues. - -commit aa875cfe11ddec239934e37ce07c1cf7804bb73b -Author: Sergio Afonso -Date: Tue Aug 20 11:09:54 2024 +0100 - - [Flang][OpenMP] Prevent re-composition of composite constructs (#102613) - - After decomposition of OpenMP compound constructs and assignment of - applicable clauses to each leaf construct, composite constructs are then - combined again into a single element in the construct queue. This helped - later lowering stages easily identify composite constructs. - - However, as a result of the re-composition stage, the same list of - clauses is used to produce all MLIR operations corresponding to each - leaf of the original composite construct. This undoes existing logic - introducing implicit clauses and deciding to which leaf construct(s) - each clause applies. - - This patch removes construct re-composition logic and updates Flang - lowering to be able to identify composite constructs from a list of leaf - constructs. As a result, the right set of clauses is produced for each - operation representing a leaf of a composite construct. - - PR stack: - - #102612 - - #102613 - -commit 3a3990cb05858e7892a4825c677891a980f1cea8 -Author: Sergio Afonso -Date: Thu Aug 15 10:15:26 2024 +0100 - - [Flang][OpenMP] Move assert for wrapper syms and block args to genLoopNestOp (#103731) - - This patch adds an assert to `genLoopNestClauses` to ensure the number - of symbols and corresponding loop wrapper entry block arguments have the - same size. This is checked by some of the callers, but it makes more - sense moving it into the function itself and avoid having to replicate - it. - -commit b8b82756cb6a8ee71ef1d8f76542458bffdc1538 -Author: Sergio Afonso -Date: Wed Aug 14 10:03:30 2024 +0100 - - [Flang][OpenMP][Lower] Clause lowering cleanup (#103058) - - This patch removes the `ClauseProcessor::processDefault` method due to - it having been implemented in - `DataSharingProcessor::collectDefaultSymbols` instead. - - Also, some `genXyzClauses` functions are updated to avoid triggering - TODO errors for clauses not supported by the corresponding construct and - to keep alphabetical sorting on the order in which clauses are - processed. - -commit 3c5509d9ad25ee49aa68ab0c60d73d9587635b62 -Author: Kazu Hirata -Date: Mon Aug 12 22:53:28 2024 -0700 - - [flang] Use llvm::is_contained (NFC) (#102999) - -commit 90aac06c7f49dd275a49b843b5fd91cb00d549b4 -Author: Tarun Prabhu -Date: Mon Aug 12 11:56:19 2024 -0600 - - [flang][mlir] Add llvm.ident metadata when compiling with flang - - This brings the behavior of flang in line with clang which also adds - this metadata unconditionally. - - Co-authored-by: Tarun Prabhu - -commit f2f41937f31e643471e4e37ef9d7c4eda806adc8 -Author: Akash Banerjee -Date: Mon Aug 12 15:36:25 2024 +0100 - - [OpenMP][MLIR] Set omp.composite attr for composite loop wrappers and add verifier checks (#102341) - - This patch sets the omp.composite unit attr for composite wrapper ops - and also add appropriate checks to the verifiers of supported ops for - the presence/absence of the attribute. - - This is patch 2/2 in a series of patches. Patch 1 - #102340. - -commit ebf530c4e98f09366865dd8c98fff88467e7db72 -Author: Sergio Afonso -Date: Mon Aug 12 10:44:22 2024 +0100 - - [Flang][OpenMP] NFC: Use ConstructQueue::const_iterator (#102612) - - This patch replaces `ConstructQueue::iterator` arguments with - `ConstructQueue::const_iterator` where it's used as a pointer to an - element inside of a `const ConstructQueue &` passed along with it. - - Since these functions don't intend to modify the list or any elements in - it, keeping constness consistent between both makes it simpler to work - with. - -commit 10df3207434e603be5f7e9b3036d821dd5623d3a -Author: Kareem Ergawy -Date: Fri Aug 2 09:46:34 2024 +0200 - - [flang][OpenMP] Enable delayed privatization for `omp parallel` by default (#90945) - - Flips the delayed privatization switch to be on by default. After the - recent fixes related to delayed privatization, the gfortran test suite - runs successfully with delayed privatization turned on by defuault for - `omp parallel`. - -commit fdfeea5bd6763277b5078e33e17e1bfc521a6cba -Author: Sergio Afonso -Date: Mon Jul 29 10:56:45 2024 +0100 - - [MLIR][OpenMP][Flang] Normalize clause arguments names (#99505) - - Currently, there are some inconsistencies to how clause arguments are - named in the OpenMP dialect. Additionally, the clause operand structures - associated to them also diverge in certain cases. The purpose of this - patch is to normalize argument names across all `OpenMP_Clause` tablegen - definitions and clause operand structures. - - This has the benefit of providing more consistent representations for - clauses in the dialect, but the main short-term advantage is that it - enables the development of an OpenMP-specific tablegen backend to - automatically generate the clause operand structures without breaking - dependent code. - - The main re-naming decisions made in this patch are the following: - - Variadic arguments (i.e. multiple values) have the "_vars" suffix. - This and other similar suffixes are removed from array attribute - arguments. - - Individual required or optional value arguments do not have any suffix - added to them (e.g. "val", "var", "expr", ...), except for `if` which - would otherwise result in an invalid C++ variable name. - - The associated clause's name is prepended to argument names that don't - already contain it as part of its name. This avoids future collisions - between arguments named the same way on different clauses and adding - both clauses to the same operation. - - Privatization and reduction related arguments that contain lists of - symbols pointing to privatizer/reducer operations use the "_syms" - suffix. This removes the inconsistencies between the names for - "copyprivate_funcs", "[in]reductions", "privatizers", etc. - - General improvements to names, replacement of camel case for snake - case everywhere, etc. - - Renaming of operation-associated operand structures to use the - "Operands" suffix in place of "ClauseOps", to better differentiate - between clause operand structures and operation operand structures. - - Fields on clause operand structures are sorted according to the - tablegen definition of the same clause. - - The assembly format for a few arguments is updated to better reflect the - clause they are associated with: - - `chunk_size` -> `dist_schedule_chunk_size` - - `grain_size` -> `grainsize` - - `simd` -> `par_level_simd` - -commit 68a0d0c76223736351fd7c452bca3ba9d80ca342 -Author: Kareem Ergawy -Date: Wed Jul 24 13:48:47 2024 +0200 - - [flang][OpenMP] Handle common blocks in delayed privatization (#100317) - - Adds proper mapping of common block elements to block arguments in - parallel regions when delayed privatization is enabled. - -commit 0ee0eeb4bb9be6aeef6c84121ca1af463840fb6a -Author: Valentin Clement (バレンタイン クレメン) -Date: Tue Jul 23 09:49:17 2024 -0700 - - [flang] Enhance location information (#95862) - - Add inclusion location information by using FusedLocation with - attribute. - - More context here: - https://discourse.llvm.org/t/rfc-enhancing-location-information/79650 - -commit 4b9fab591916eec9fd1942f37afe3b137b564089 -Author: David Truby -Date: Fri Jul 19 15:55:36 2024 +0100 - - [flang][OpenMP] Implement lastprivate with collapse (#99500) - - This patch enables the lastprivate clause to be used in the presence of - the collapse clause. - - Note: the way we currently implement lastprivate means that this adds a - large number of compare instructions to the end of every iteration of - the loop. This is a clearly non-optimal thing to do, but lastprivate in - general will need re-implementing to prevent this. This is planned as - part of the delayed privatization work. This current implementation is - just a stop-gap measure as generating sub-optimal but working code is - better than crashing out. - -commit 3ad7108c3cf843cac6301db3f73ccea9661bc4d3 -Author: Valentin Clement (バレンタイン クレメン) -Date: Wed Jul 17 08:39:18 2024 -0700 - - [flang][cuda] Avoid temporary when RHS is a logical constant (#99078) - - Enhance the detection of constant on the RHS for logical cases so we - don't create a temporary. - -commit f1d3fe7aae7867b5de96b84d6d26b5c9f02f209a -Author: Alexis Perry-Holby -Date: Tue Jul 16 09:48:24 2024 -0600 - - Add basic -mtune support (#98517) - - Initial implementation for the -mtune flag in Flang. - - This PR is a clean version of PR #96688, which is a re-land of PR #95043 - -commit e34e739ba88fed7450d232e29b523d247af365ec -Author: Anchu Rajendran S -Date: Fri Jul 12 10:14:38 2024 -0700 - - Adding Changes for invoking Masked Operation (#98423) - - PR adds changes to the flang frontend to create the `MaskedOp` when - `masked` directive is used in the input program. Omp masked is - introduced in 5.2 standard and allows a parallel region to be executed - by threads specified by a programmer. This is achieved with the help of - filter clause which helps to specify thread id expected to execute the - region. - - Other related PRs: - - [Fortran Parsing and Semantic - Support](https://github.com/llvm/llvm-project/pull/91432) - Merged - - [MLIR Support](https://github.com/llvm/llvm-project/pull/96022/files) - - Merged - - [Lowering Support](https://github.com/llvm/llvm-project/pull/98401) - - Under Review - -commit de90391ea88c51da8bcde95206f3f31ecbaf97a3 -Author: Tom Eccles -Date: Fri Jul 12 10:29:21 2024 +0100 - - [flang][OpenMP] Lower REDUCTION clause for SECTIONS (#97858) - - The tricky bit here is that we need to generate the reduction symbol - mapping inside each of the nested SECTION constructs. This is a bit - similar to omp.canonical_loop inside of omp.wsloop, except the SECTION - constructs come from the PFT. - - To make this work I moved the lowering of the SECTION constructs inside - of the lowering SECTIONS (where reduction information is still - available). This subverts the normal control flow for OpenMP lowering a - bit. - - One alternative option I investigated would be to generate the SECTION - CONSTRUCTS as normal as though there were no reduction, and then to fix - them up after control returns back to genSectionsOp. The problem here is - that the code generated for the section body has the wrong symbol - mapping for the reduction variable, so all of the nested code has to be - patched up. In my prototype version this was even more hacky than what - the solution I settled upon. - -commit 9b6504e98359f5d14fdaa353b2789e7e95239f96 -Author: Valentin Clement (バレンタイン クレメン) -Date: Thu Jul 11 17:15:54 2024 -0700 - - [flang][cuda] Make sure to issue freemem for the allocated temp (#98078) - - When implicit data transfer is created, make sure we generate the - `freemem` op on the `allocmem` result value and not the declare op - value. - -commit bd7b16217bbac4b1e1a25c7bf9566db715ca9b10 -Author: Valentin Clement (バレンタイン クレメン) -Date: Tue Jul 9 10:13:00 2024 -0700 - - [flang][cuda] Add conversion for stream value in cuf kernel directive (#98082) - - The stream value is defined as an i32 value in the operation. Add a - conversion so the declared integer can be different and an i32 value. - -commit 918ac62916d48649f224f8c54837d25baff97a08 -Author: Sergio Afonso -Date: Tue Jul 9 11:17:21 2024 +0100 - - [Flang][OpenMP] Add lowering support for DISTRIBUTE SIMD (#97819) - - This patch adds support for lowering 'DISTRIBUTE SIMD' constructs to - MLIR. Translation of `omp.distribute` operations to LLVM IR is still not - supported, so its composition with `omp.simd` isn't either. - -commit 52d2d8200807357a582e089eaa95692b2c77da2e -Author: Sergio Afonso -Date: Tue Jul 9 11:15:35 2024 +0100 - - [Flang][OpenMP] Add lowering support for DO SIMD (#97718) - - This patch adds support for lowering 'DO SIMD' constructs to MLIR. SIMD - information is now stored in an `omp.simd` loop wrapper, which is - currently ignored by the OpenMP dialect to LLVM IR translation stage. - - The end result is that runtime behavior of compiled 'DO SIMD' constructs - does not change after this patch, so 'DO SIMD' still runs like 'DO' - (i.e. SIMD width = 1). However, all of the required information is now - present in the resulting MLIR representation. - - To avoid confusion, the previous wsloop-simd.f90 lit test is renamed to - wsloop-schedule.f90 and a new wsloop-simd.f90 test is created to check - the addition of SIMD clauses to the `omp.simd` operation produced when a - 'DO SIMD' construct is lowered to MLIR. - -commit 2b56005fd91de3adba1957e70575d5461329178b -Author: Sergio Afonso -Date: Tue Jul 9 10:32:20 2024 +0100 - - [Flang][OpenMP] Refactor loop-related lowering for composite support (#97566) - - This patch splits the lowering for `omp.loop_nest` into its own function - and updates lowering for all supported loop wrappers to stop creating - this operation themselves. - - Lowering functions for loop constructs are split into "wrapper" and - "standalone" variants, where the "wrapper" version only creates the - specific operation with nothing inside of it and the "standalone" - version calls the former and also handles clause processing and creates - the nested `omp.loop_nest`. - - "Wrapper" lowering functions can be used by "composite" lowering - functions in follow-up patches, minimizing code duplication. - - Tests broken as a result of reordering between the processing of the - loop wrapper's and the nested `omp.loop_nest`'s clauses are also - updated. - -commit 720b958953d1dd34c8e3a52588ab26c52edfc2a6 -Author: Sergio Afonso -Date: Fri Jul 5 10:38:03 2024 +0100 - - [Flang][OpenMP] NFC: Share DataSharingProcessor creation logic for all loop directives (#97565) - - This patch moves the logic associated with the creation of a - `DataSharingProcessor` instance for loop-associated OpenMP leaf - constructs to the `genOMPDispatch` function, avoiding code duplication - for standalone and composite loop constructs. - - This also prevents privatization-related allocations to be later made - inside of loop wrappers when support for composite constructs is - implemented. - -commit 2d0c4c363b4b39014b469c30234cf06894e06d6a -Author: Sergio Afonso -Date: Fri Jul 5 10:13:30 2024 +0100 - - [Flang][OpenMP] Remove unused OpWithBodyGenInfo attributes (#97572) - - This patch removes the `outerCombined`, `reductionSymbols` and - `reductionTypes` attributes from the `OpWithBodyGenInfo` structure and - their uses, as they never impact the lowering process or its output. - - The `outerCombined` variable is always set to `false`, so in practice it - doesn't represent what its name indicates. Furthermore, initializing it - correctly can result in privatization not being performed in cases where - it should (at least tests doing this together with composite construct - support pointed me in that direction). It seems to be tied to the early - privatization approach, where a redundant alloca could possibly be - avoided in certain cases. With the transition to delayed privatization, - it seems like it won't serve that purpose anymore, since the decision of - what and where privatization-related allocations are inserted will be - postponed to the MLIR to LLVM IR translation stage. Since this feature - is already currently not being used, its potential benefit appears to be - minor and it won't make sense to do once the delayed privatization - approach is rolled out, I propose removing it. - - The `reductionSymbols` and `reductionTypes` variables are set in certain - cases but never used. Unless there's a plan where these will be needed, - in which case it would be a better alternative to document it, I believe - we should also remove them. - -commit 817f0d9d3099a25ea83fcf633071ccaaeac09954 -Author: Sergio Afonso -Date: Thu Jul 4 15:31:20 2024 +0100 - - [Flang][OpenMP] NFC: Remove unused argument for omp.target lowering (#97564) - - This patch removes the `outerCombined` argument from `genTargetOp()` and - the `processReduction` argument from `genTargetClauses()`, as they - aren't used. - -commit 03579455bd941da6278f883ed8827ef0fbeb5e50 -Author: agozillon -Date: Wed Jul 3 07:07:53 2024 +0200 - - [Flang][OpenMP] More elegantly handle declare target in unnamed program (#95834) - - This PR is related to the following issue: - - https://github.com/llvm/llvm-project/issues/63362 - - It tries to solve the crash (which is now slightly different, since the - issue has been languishing for a while sorry about that I missed the - original issue ping). - - The crash occurs due to trying to access the symbol of an - undefined/unnamed main when trying to find a declare target symbol that - has not been specified (but can be assumed based on it's residence in a - function or interface). - - The solution in this PR will check if we're trying to retrieve a main - symbol, and then if that is the case, we make sure it exists (due to - being named) before we attempt to retrieve it, this avoids the crash. - - However, that's only part of the issue in the above example, the other - is the significant amount of nested directives, I think we are still a - little while away from handling this, I have added a reduced variation - of the test in the issue as a replicator which contains a lesser number - of nesting directives. To push the issue along further, it will likely - be a case of working through a number of variations of nested directives - in conjunction with target + parallel. - - However, this PR pushes the issue above to the point where the issue - encountered is identical to the following: - https://github.com/llvm/llvm-project/issues/67231 - -commit 66d5ca2a3d8df780951ce2987157ef03e73393c7 -Author: jeanPerier -Date: Tue Jul 2 15:19:49 2024 +0200 - - Reland "[flang] add extra component information in fir.type_info" (#97404) - - Reland #96746 with the proper Support/CMakelist.txt change. - - fir.type does not contain all Fortran level information about - components. For instance, component lower bounds and default initial - value are lost. For correctness purpose, this does not matter because - this information is "applied" in lowering (e.g., when addressing the - components, the lower bounds are reflected in the hlfir.designate). - - However, this "loss" of information will prevent the generation of - correct debug info for the type (needs to know about lower bounds). The - initial value could help building some optimization pass to get rid of - initialization runtime calls. - - This patch adds lower bound and initial value information into - fir.type_info via a new fir.dt_component operation. This operation is - generated only for component that needs it, which helps keeping the IR - small for "boring" types. - - In general, adding Fortran level info in fir.type_info will allow - delaying the generation of "type descriptors" gobals that are very - verbose in FIR and make it hard to work with FIR dumps from applications - with many derived types. - -commit 29cdc8f9ca58411992d3fa5afd89e0628df24679 -Author: Leandro Lupori -Date: Mon Jul 1 14:10:35 2024 -0300 - - [flang][OpenMP] Fix nested privatization of allocatable (#96968) - - In nested constructs where a given variable is privatized more than - once, using the default clause, the innermost host association symbol - will point to the previous host association symbol. - Such symbol lacks the allocatable attribute and can't be used to - generate the type of the symbol to be cloned. Use the ultimate - symbol instead. - - Fixes #85594, #80398 - -commit 03d9a317725cdc03a0558eb49ff53bcc5c45dd08 -Author: Sergio Afonso -Date: Mon Jul 1 11:08:14 2024 +0100 - - [Flang][OpenMP] Update flang with changes to the OpenMP dialect (#92524) - - This patch applies fixes after the updates to OpenMP clause operands, as - well as updating some tests that were impacted by changes to the - ordering or assembly format of some clauses in MLIR. - -commit 4a746e50b14f252b40e03860562636dd1ded8044 -Author: Kareem Ergawy -Date: Fri Jun 28 05:20:59 2024 +0200 - - [NFC][flang] Remove unused `converter` parameter. (#96854) - - Removes an unused parameter in 2 utils that generate bounds ops. - -commit 6a66b8224d8cbdb6156b9a12d9339fee71898941 -Author: jeanPerier -Date: Thu Jun 27 19:22:48 2024 +0200 - - Revert "[flang] add extra component information in fir.type_info" (#96937) - - Reverts llvm/llvm-project#96746 - Breaking shared library buillds: - https://lab.llvm.org/buildbot/#/builders/89/builds/931 - -commit 1448ed2000ff0be17025dab0aad7412d054425eb -Author: jeanPerier -Date: Thu Jun 27 18:59:03 2024 +0200 - - [flang] add extra component information in fir.type_info (#96746) - - fir.type does not contain all Fortran level information about - components. For instance, component lower bounds and default initial - value are lost. For correctness purpose, this does not matter because - this information is "applied" in lowering (e.g., when addressing the - components, the lower bounds are reflected in the hlfir.designate). - - However, this "loss" of information will prevent the generation of - correct debug info for the type (needs to know about lower bounds). The - initial value could help building some optimization pass to get rid of - initialization runtime calls. - - This patch adds lower bound and initial value information into - fir.type_info via a new fir.dt_component operation. This operation is - generated only for component that needs it, which helps keeping the IR - small for "boring" types. - - In general, adding Fortran level info in fir.type_info will allow - delaying the generation of "type descriptors" gobals that are very - verbose in FIR and make it hard to work with FIR dumps from applications - with many derived types. - -commit b4ab52c8e71e819c13606de3500043eaa701e1ea -Author: harishch4 -Date: Thu Jun 27 11:58:12 2024 +0530 - - [Flang][OpenMP] Lowering Order clause to MLIR (#96730) - -commit 8dd9494056d6797144dfabbbfb6d478c95375019 -Author: Tarun Prabhu -Date: Tue Jun 25 13:25:39 2024 -0600 - - Revert "[flang] Add basic -mtune support" (#96678) - - Reverts llvm/llvm-project#95043 - -commit aec735cf476c3975b026aa79fa40dda06a27fac3 -Author: agozillon -Date: Tue Jun 25 20:54:04 2024 +0200 - - [Flang][OpenMP][MLIR] Fix common block mapping for regular and declare target link (#91829) - - This PR attempts to fix common block mapping for regular mapping of - these types as well as when they have been marked as "declare target - link". This PR should allow correct mapping of both the members of a - common block and the full common block via its block symbol. - - The main changes were some adjustments to the Fortran OpenMP lowering to - HLFIR/FIR, the lowering of the LLVM+OpenMP dialect to LLVM-IR and - adjustments to the way the we handle target kernel map argument - rebinding inside of the OMPIRBuilder. - - For the Fortran OpenMP lowering were two changes, one to prevent the - implicit capture of common block members when the common block symbol - itself has been marked and the other creates intermediate member access - inside of the target region to be used in-place of those external to the - target region, this prevents external usages breaking the - IsolatedFromAbove pact. - - In the latter case, there was an adjustment to the size calculation for - types to better handle cases where we pass an array as the type of a map - (as opposed to the bounds and the type of the element), which occurs in - the case of common blocks. There is also some adjustment to how - handleDeclareTargetMapVar handles renaming of declare target symbols in - the module to the reference pointer, now it will only apply to those - within the kernel that is currently being generated and we also perform - a modification to replace constants with instructions as necessary as we - cannot replace these with our reference pointer (non-constant and - constants do not mix nicely). - - In the case of the OpenMPIRBuilder some changes were made to defer - global symbol rebinding to kernel arguments until all other arguments - have been rebound. This makes sure we do not replace uses that may refer - to the global (e.g. a GEP) but are themselves actually a separate - argument that needs bound. - - Currently "declare target to" still needs some work, but this may be the - case for all types in conjunction with "declare target to" at the - moment. - -commit a790279bf2a8be2f9c42bf80f55a63933e398d0e -Author: Alexis Perry-Holby -Date: Tue Jun 25 11:39:35 2024 -0600 - - [flang] Add basic -mtune support (#95043) - - This PR adds -mtune as a valid flang flag and passes the information - through to LLVM IR as an attribute on all functions. No specific - architecture optimizations are added at this time. - -commit 952bdaaf79c1e5d7364160b21de0cd1295cdfbd8 -Author: Leandro Lupori -Date: Tue Jun 25 09:25:41 2024 -0300 - - [flang][OpenMP] Fix copyprivate allocatable/pointer lowering (#95975) - - The lowering of copyprivate clauses with allocatable or pointer - variables was incorrect. This happened because the values passed to - copyVar() are always wrapped in SymbolBox::Intrinsic, which - resulted in allocatable/pointer variables being handled as regular - ones. - - This is fixed by providing to copyVar() the attributes of the - variables being copied, to make it possible to detect and handle - allocatable/pointer variables correctly. - - Fixes #95801 - -commit 8e8dccdecd4a5302fcfad33b4ee1282ae808b106 -Author: Valentin Clement (バレンタイン クレメン) -Date: Wed Jun 19 13:35:02 2024 -0700 - - [flang][cuda] Do not consider PINNED as device attribute (#95988) - - PINNED is a CUDA data attribute meant for the host variables. Do not - consider it when computing the number of device variables in assignment - for the cuda data transfer. - -commit 506b4cdae0929ff4bc7174cb580b5e55b8a74a0b -Author: David Truby -Date: Tue Jun 18 14:25:56 2024 +0100 - - [flang] Change vector always errors to warnings (#95908) - -commit 77d8cfb3c50e3341d65af1f9e442004bbd77af9b -Author: Alexander Shaposhnikov <6532716+alexander-shaposhnikov@users.noreply.github.com> -Date: Mon Jun 17 12:59:04 2024 -0700 - - [Flang] Switch to common::visit more call sites (#90018) - - Switch to common::visit more call sites. - - Test plan: ninja check-all - -commit 85f4593e856e5034c5de1e6bbea13fb59e1995f5 -Author: khaki3 <47756807+khaki3@users.noreply.github.com> -Date: Mon Jun 17 09:21:30 2024 -0700 - - [flang] Add a REDUCE clause to each nested loop (#95555) - - For DO CONCURRENT REDUCE, every nested loop should have a REDUCE clause - so that we can lower reduction without analysis. - -commit c6b6e18c4d25305ab98b6eab752de99ea4e15344 -Author: David Truby -Date: Fri Jun 14 14:10:41 2024 +0100 - - [flang] Implement !DIR$ VECTOR ALWAYS (#93830) - - This patch implements support for the VECTOR ALWAYS directive, which - forces - vectorization to occurr when possible regardless of a decision by the - cost - model. This is done by adding an attribute to the branch into the loop - in LLVM - to indicate that the loop should always be vectorized. - - This patch only implements this directive on plan structured do loops - without labels. Support for unstructured loops and array - expressions is planned for future patches. - -commit 7ffeaf0e187b41994f63ae82e73e123b942cd16b -Author: harishch4 -Date: Fri Jun 14 09:37:38 2024 +0530 - - [MLIR][Flang][OpenMP] Implement lowering simd aligned to MLIR (#95198) - - Rebased @DominikAdamski patch: https://reviews.llvm.org/D142722 - - --------- - - Co-authored-by: Dominik Adamski - Co-authored-by: Tom Eccles - -commit 7665d3d90da7f32e56cb57eb192dc8f189730686 -Author: Iman Hosseini -Date: Wed Jun 12 19:18:41 2024 +0100 - - [flang] Add reductions for CUF Kernels: Lowering (#95184) - - * Add reductionOperands and reductionAttrs to cuf's KernelOp. - * Parsing is already working and the tree has the info: here I make the - Bridge emit the updated KernelOp with reduction information added. - * Check |reductionAttrs| = |reductionOperands| in verifier - * Add a test - @clementval @vzakhari - - --------- - - Co-authored-by: Iman Hosseini - Co-authored-by: Valentin Clement (バレンタイン クレメン) - -commit 87374a8cffb6b6f589e8810a4d8502623e9d0268 -Author: vdonaldson <37090318+vdonaldson@users.noreply.github.com> -Date: Wed Jun 12 09:35:14 2024 -0400 - - [flang] Add support for lowering directives at the CONTAINS level (#95123) - - There is currently support for lowering directives that appear outside - of a module or procedure, or inside the body of a module or procedure. - Extend this to support directives at the CONTAINS level of a module or - procedure, such as directives 3, 5, 7 9, and 10 in: - - !dir$ some directive 1 - module m - !dir$ some directive 2 - contains - !dir$ some directive 3 - subroutine p - !dir$ some directive 4 - contains - !dir$ some directive 5 - subroutine s1 - !dir$ some directive 6 - end subroutine s1 - !dir$ some directive 7 - subroutine s2 - !dir$ some directive 8 - end subroutine s2 - !dir$ some directive 9 - end subroutine p - !dir$ some directive 10 - end module m - !dir$ some directive 11 - - This is done by looking for CONTAINS statements at the module or - procedure level, while ignoring CONTAINS statements at the derived type - level. - -commit fc1c34bbcb7811ebdbb623c5a4473c4f186c434d -Author: Sergio Afonso -Date: Wed Jun 12 12:34:23 2024 +0100 - - [Flang][OpenMP][Lower] Add lowering support of OpenMP distribute to MLIR (#67798) - - This patch adds support for lowering the OpenMP DISTRIBUTE directive - from PFT to MLIR. It only supports standalone DISTRIBUTE, support for - composite constructs will come in follow-up PRs. - -commit e7d569a0faa833623af59d4eab5d6277ce031d9e -Author: Valentin Clement -Date: Mon Jun 10 08:50:08 2024 -0700 - - [flang] Fix copy creation in #94718 - -commit f11e08fb26642fddebdefca5bec933fe39e4bd03 -Author: khaki3 <47756807+khaki3@users.noreply.github.com> -Date: Mon Jun 10 08:41:05 2024 -0700 - - [flang] Generate fir.do_loop reduce from DO CONCURRENT REDUCE clause (#94718) - - Derived from #92480. This PR updates the lowering process of DO - CONCURRENT to support F'2023 REDUCE clause. The structure - `IncrementLoopInfo` is extended to have both reduction operations and - symbols in `reduceSymList`. The function `getConcurrentControl` - constructs `reduceSymList` for the innermost loop. Finally, - `genFIRIncrementLoopBegin` builds `fir.do_loop` with reduction operands. - -commit 1539da4601448711fcfa622e26e596973d58c670 -Author: Kareem Ergawy -Date: Fri Jun 7 18:08:25 2024 +0200 - - [flang][OpenMP] Add `--openmp-enable-delayed-privatization-staging` flag (#94749) - -commit 913a8244fe8687df1f27b61c87aa23cf4fcbe84e -Author: Kareem Ergawy -Date: Fri Jun 7 14:44:01 2024 +0200 - - [flang][OpenMP] Lower `target .. private(..)` to `omp.private` ops (#94195) - - Extends delayed privatization support to `taraget .. private(..)`. With - this PR, `private` is support for `target` **only** is delayed - privatization mode. - -commit 8b18f2fe066b9f895185f1d94c6cf34901590164 -Author: Krzysztof Parzyszek -Date: Wed Jun 5 13:38:28 2024 -0500 - - [flang][OpenMP] Add `sym()` member function to omp::Object (#94493) - - The object identity requires more than just `Symbol`. Don't use `id()` - to get the Symbol associated with the object, becase the return value - will need to change. Instead use `sym()` which is added for that reason. - -commit b9549261e218cee2ad1305fb7272b831799b7bfe -Author: Sergio Afonso -Date: Wed Jun 5 14:43:58 2024 +0100 - - [Flang][OpenMP] Add -fopenmp-force-usm option to flang (#94359) - - This patch enables the `-fopenmp-force-usm` option to be passed to the - flang driver, which forwards it to the compiler frontend. This flag, - when set, results in the introduction of the `unified_shared_memory` bit - to the `omp.requires` attribute of the top-level module operation. - - This is later combined with any other target device-related REQUIRES - clauses that may have been explicitly set in the compilation unit. - -commit c7593344f48e64af29fd9512852f24f9ebe5a4c6 -Author: Peter Klausler <35819229+klausler@users.noreply.github.com> -Date: Mon Jun 3 12:58:39 2024 -0700 - - [flang] Better error message for RANK(NULL()) (#93577) - - We currently complain that the argument may not be a procedure, which is - confusing. Distinguish the NULL() case from other error cases (which are - indeed procedures). And clean up the utility predicates used for these - tests -- the current IsProcedure() is really just a test for a procedure - designator. - -commit d1aa9bac3c8ecc30fcc5d4d80a1f70c729aec909 -Author: jeanPerier -Date: Mon Jun 3 17:20:07 2024 +0200 - - [flang] lower select rank (#93967) - - Lower select rank according to [assumed-rank lowering design - doc](https://github.com/llvm/llvm-project/blob/main/flang/docs/AssumedRank.md). - - The construct is lowered using fir.box_rank and fir.select_case - operation and, for the non pointer/allocatable case, a - fir.is_assumed_size + conditional branch before the select_case to deal - with the assumed-size case. - - The way the CFG logic is generated, apart from the extra conditional - branch for assumed-size, is similar to what is done for SELECT CASE - lowering, hence the sharing of the construct level visitor. - For the CFG parts. The main difference is that we need to keep track of - the selector to cook it and map it inside the cases (hence the new - members of the ConstructContext). - - The only TODOs left are to deal with the RANK(*) case for polymorphic - entities and PDTs. I will do the polymorphic case in a distinct patch, - this patch has enough content. - - Fortran::evaluate::IsSimplyContiguous change is needed to avoid generating - copy-in/copy-out runtime calls when passing the RANK(*) associating - entity to some implicit interface. - -commit 6af4118f1557eb7ac07147607bd23e90c5bf2b35 -Author: Kareem Ergawy -Date: Mon May 27 14:26:52 2024 +0200 - - Reapply #91116 with fix (#93160) - - This PR contains 2 commits: - 1. A commit to reapply changes introduced #91116 (was reverted earlier - due to test suite failures) - 2. A commit containing a possible solution for the issue causing the - test suite failures. In particular, it introduces a simple symbol - visitor class to keep track of the current active OMP construct and - marking this active construct as the scope defining the symbol being - visisted. - -commit 1a2a0c0dc9aab6f440033f36ff2323685080f46a -Author: Anchu Rajendran S -Date: Thu May 23 13:46:35 2024 +0530 - - Fixing the location attribute added to mapInfoOp (#90764) - - Named location attribute added to `tgt_offload_entry` shall be used by - runtime calls like `ompx_dump_mapping_tables` to print the information - of variables that are mapped to the device. `ompx_dump_mapping_tables` - was printing the wrong location information and this change fixes it. - - A sample execution of example before the change: - ``` - omptarget device 0 info: OpenMP Host-Device pointer mappings after block at libomptarget:0:0: - - omptarget device 0 info: Host Ptr Target Ptr Size (B) DynRefCount HoldRefCount Declaration - - omptarget device 0 info: 0x0000000000206df0 0x00007f02cdc00000 20000000 1 0 at unknown:18:35 - ``` - - The change replaces unknown to the mapped symbol and location to the - declaration location. - -commit 0bc710f7c19910817ccff254c43496602635bbc9 -Author: Valentin Clement (バレンタイン クレメン) -Date: Tue May 21 12:42:30 2024 -0700 - - [flang][cuda] Accept constant as src for cuf.data_tranfer (#92951) - - Assignment of a constant (host) to a device variable is a special case - that can be further lowered to `cudaMemset` or similar functions. This - patch update the lowering to avoid the creation of a temporary when we - assign a constant to a device variable. - -commit 1fc3ce1cdb8390ed64feea939a9555d3642439ea -Author: Valentin Clement (バレンタイン クレメン) -Date: Tue May 21 11:23:55 2024 -0700 - - [flang][cuda] Enable data transfer for descriptors (#92804) - - Remove the TODO when data transfer is done with descriptor variables. - -commit c1b5b7c19b76f8d153f7ae9350d217b74888ed93 -Author: Krzysztof Parzyszek -Date: Tue May 21 08:19:54 2024 -0500 - - [flang][Lower] Emit exiting branches from within constructs (#92455) - - When lowering IfConstruct, CaseConstruct, and SelectTypeConstruct, emit - branches that exit the construct in each block that is still - unterminated after the FIR has been generated in it. - - The same thing may be needed for SelectRankConstruct, once it's - supported. - - This eliminates the need for inserting branches in `genFIR(Evaluation)`. - - Follow-up to PR https://github.com/llvm/llvm-project/pull/91614. - -commit 6658e1a3fdfebfc9d1805029ca0e4de643634927 -Author: Anchu Rajendran S -Date: Mon May 20 21:32:41 2024 -0700 - - Adding parsing and semantic check support for omp masked (#91432) - - omp masked directive in OpenMP 5.2 allows to specify code regions which - are expected to be executed by thread ids specified by the programmer. - Filter clause of the directive allows to specify the thread id. This - change adds the parsing support for the directive - -commit 85e1124049cd8aa1e58c101e082100ba74df7e42 -Author: Muhammad Omair Javaid -Date: Tue May 21 06:50:43 2024 +0500 - - Revert "[flang][OpenMP] Try to unify induction var privatization for OMP regions. (#91116)" - - This reverts commit 2a97b507dc643b7ee3bc651b3f21b754cfba433c. - - It has broken LLVM testsuite on various bots - https://lab.llvm.org/buildbot/#/builders/184/builds/12760 - https://lab.llvm.org/buildbot/#/builders/197/builds/14376 - https://lab.llvm.org/buildbot/#/builders/179/builds/10176 - -commit 2a97b507dc643b7ee3bc651b3f21b754cfba433c -Author: Kareem Ergawy -Date: Sat May 18 08:39:58 2024 +0200 - - [flang][OpenMP] Try to unify induction var privatization for OMP regions. (#91116) - -commit 45daa4fdc68f5faa5bd5c33da052d2415cd88540 -Author: Valentin Clement (バレンタイン クレメン) -Date: Fri May 17 09:37:53 2024 -0700 - - [flang][cuda] Move CUDA Fortran operations to a CUF dialect (#92317) - - The number of operations dedicated to CUF grew and where all still in - FIR. In order to have a better organization, the CUF operations, - attributes and code is moved into their specific dialect and files. CUF - dialect is tightly coupled with HLFIR/FIR and their types. - - The CUF attributes are bundled into their own library since some - HLFIR/FIR operations depend on them and the CUF dialect depends on the - FIR types. Without having the attributes into a separate library there - would be a dependency cycle. - -commit 74a87548e5b62881108e6cd1fd63b45580fc3097 -Author: Tom Eccles -Date: Thu May 16 15:27:59 2024 +0100 - - [flang][MLIR][OpenMP] make reduction by-ref toggled per variable (#92244) - - Fixes #88935 - - Toggling reduction by-ref broke when multiple reduction clauses were - used. Decisions made for the by-ref status for later clauses could then - invalidate decisions for earlier clauses. For example, - - ``` - reduction(+:scalar,scalar2) reduction(+:array) - ``` - - The first clause would choose by value reduction and generate by-value - reduction regions, but then after this the second clause would force - by-ref to support the array argument. But by the time the second clause - is processed, the first clause has already had the wrong kind of - reduction regions generated. - - This is solved by toggling whether a variable should be reduced by - reference per variable. In the above example, this allows only `array` - to be reduced by ref. - -commit 7a66e4209b0b4cc0dc871a54c4f07a4b0054b5f7 -Author: Krzysztof Parzyszek -Date: Thu May 16 07:49:01 2024 -0500 - - [flang][OpenMP] Remove unnecessary `Fortran::` qualification, NFC (#92298) - - The `Fortran::` namespace is redundant for all parts of the code in this - PR, except for names of functions in their definitions. - -commit 526553b25131a69d9d6426e17c7b69c2ba27144f -Author: Yusuke MINATO -Date: Thu May 16 13:16:07 2024 +0900 - - [flang] Add nsw flag to do-variable increment with a new option (#91579) - - This patch adds nsw flag to the increment of do-variables when a new - option is enabled. - NOTE 11.10 in the Fortran 2018 standard says they never overflow. - - See also the discussion in #74709 and the following discourse post. - https://discourse.llvm.org/t/rfc-add-nsw-flags-to-arithmetic-integer-operations-using-the-option-fno-wrapv/77584/5 - -commit 4525f442fadb7cc44cc2eaede2c8ac6ba15bdf78 -Author: Krzysztof Parzyszek -Date: Wed May 15 12:01:16 2024 -0500 - - [flang][OpenMP] Don't pass clauses to op-generating functions anymore (#90108) - - Remove parameter `const List &clauses` from functions that take - construct queue. The clauses should now be accessed from the construct - queue. - -commit 415616daa0bdf6c0065c4c1967f1c4050e6ea836 -Author: Krzysztof Parzyszek -Date: Tue May 14 07:33:07 2024 -0500 - - [flang][OpenMP] Lower standalone ops via OMP dispatch, NFC (#92045) - - This moves lowering of standalone OpenMP ops into the dispatch function. - Follow-up to PR90098. - -commit c7c5666aac543a49b485a133f4a94865e2613a43 -Author: jeanPerier -Date: Tue May 14 13:34:46 2024 +0200 - - [flang] Do not hoist all scalar sub-expressions from WHERE constructs (#91395) - - The HLFIR pass lowering WHERE (hlfir.where op) was too aggressive in its - hoisting of scalar sub-expressions from LHS/RHS/MASKS outside of the - loops generated for the WHERE construct. - This violated F'2023 10.2.3.2 point 10 that stipulated that elemental - operations must be evaluated only for elements corresponding to true - values, because scalar operations are still elemental, and hoisting them - is invalid if they could have side effects (e.g, division by zero) and - if the MASK is always false (i.e., the loop body is never evaluated). - - The difficulty is that 10.2.3.2 point 9 mandates that nonelemental - function must be evaluated before the loops. So it is not possible to - simply stop hoisting non hlfir.elemental operations. - Marking calls with an elemental/nonelemental attribute would not allow - the pass to be correct if inlining is run before and drops this - information, beside, extracting the argument tree that may have been - CSE-ed with the rest of the expression evaluation would be a bit - combursome. - - Instead, lower nonelemental calls into a new hlfir.exactly_once - operation that will allow retaining the information that the operations - contained inside its region must be hoisted. This allows inlining to - operate before if desired in order to improve alias analysis. - - The LowerHLFIROrderedAssignments pass is updated to only hoist the - operations contained inside hlfir.exactly_once bodies. - -commit 1066eb55477044a3a92f3a40471375194dfcdbc8 -Author: Kazu Hirata -Date: Mon May 13 09:33:43 2024 -0700 - - [flang] Fix a warning - - This patch fixes: - - flang/lib/Lower/OpenMP/OpenMP.cpp:2346:14: error: unused variable - 'origDirective' [-Werror,-Wunused-variable] - -commit be7c9e39572d876c16b6a8d7f4addaf9409071ff -Author: Krzysztof Parzyszek -Date: Mon May 13 08:09:24 2024 -0500 - - [flang][OpenMP] Decompose compound constructs, do recursive lowering (#90098) - - A compound construct with a list of clauses is broken up into individual - leaf/composite constructs. Each such construct has the list of clauses - that apply to it based on the OpenMP spec. - - Each lowering function (i.e. a function that generates MLIR ops) is now - responsible for generating its body as described below. - - Functions that receive AST nodes extract the construct, and the clauses - from the node. They then create a work queue consisting of individual - constructs, and invoke a common dispatch function to process (lower) the - queue. - - The dispatch function examines the current position in the queue, and - invokes the appropriate lowering function. Each lowering function - receives the queue as well, and once it needs to generate its body, it - either invokes the dispatch function on the rest of the queue (if any), - or processes nested evaluations if the work queue is at the end. - - Re-application of ca1bd5995f6ed934f9187305190a5abfac049173 with fixes for - compilation errors. - -commit 25a3ba33153e99c4614d404ba18b761d652e24de -Author: Krzysztof Parzyszek -Date: Mon May 13 08:42:06 2024 -0500 - - Revert "[flang][OpenMP] Decompose compound constructs, do recursive lowering (#90098)" - - It breaks some builds, e.g. - https://lab.llvm.org/buildbot/#/builders/268/builds/13909 - - This reverts commit ca1bd5995f6ed934f9187305190a5abfac049173. - -commit ca1bd5995f6ed934f9187305190a5abfac049173 -Author: Krzysztof Parzyszek -Date: Mon May 13 08:09:24 2024 -0500 - - [flang][OpenMP] Decompose compound constructs, do recursive lowering (#90098) - - A compound construct with a list of clauses is broken up into individual - leaf/composite constructs. Each such construct has the list of clauses - that apply to it based on the OpenMP spec. - - Each lowering function (i.e. a function that generates MLIR ops) is now - responsible for generating its body as described below. - - Functions that receive AST nodes extract the construct, and the clauses - from the node. They then create a work queue consisting of individual - constructs, and invoke a common dispatch function to process (lower) the - queue. - - The dispatch function examines the current position in the queue, and - invokes the appropriate lowering function. Each lowering function - receives the queue as well, and once it needs to generate its body, it - either invokes the dispatch function on the rest of the queue (if any), - or processes nested evaluations if the work queue is at the end. - -commit a427aa9346295fe7dd3be5955214d28c8be2ad4a -Author: Krzysztof Parzyszek -Date: Fri May 10 15:04:39 2024 -0500 - - [flang][Lower] Treat directives with nested evaluations as constructs (#91614) - - When generating block terminators in `genFIR(Evaluation)`, treat - `Directives` with nested evaluations the same way as `Constructs` to - determine the successor block. - - This fixes https://github.com/llvm/llvm-project/issues/91526 - -commit 435e850ba97ab567a14b6c84d2b27cadb771cb27 -Author: Andrew Gozillon -Date: Mon Feb 12 10:53:28 2024 -0600 - - [Flang][OpenMP][MLIR] Initial derived type member map support - - This patch is one in a series of four patches that seeks to refactor - slightly and extend the current record type map support that was - put in place for Fortran's descriptor types to handle explicit - member mapping for record types at a single level of depth. - - For example, the below case where two members of a Fortran - derived type are mapped explicitly: - - '''' - type :: scalar_and_array - real(4) :: real - integer(4) :: array(10) - integer(4) :: int - end type scalar_and_array - type(scalar_and_array) :: scalar_arr - - !$omp target map(tofrom: scalar_arr%int, scalar_arr%real) - '''' - - Current cases of derived type mapping left for future work are: - > explicit member mapping of nested members (e.g. two layers of - record types where we explicitly map a member from the internal - record type) - > Fortran's automagical mapping of all elements and nested elements - of a derived type - > explicit member mapping of a derived type and then constituient members - (redundant in Fortran due to former case but still legal as far as I am aware) - > explicit member mapping of a record type (may be handled reasonably, just - not fully tested in this iteration) - > explicit member mapping for Fortran allocatable types (a variation of nested - record types) - - This patch seeks to support this by extending the Flang-new OpenMP lowering to - support generation of this newly required information, creating the neccessary - parent <-to-> member map_info links, calculating the member indices and - setting if it's a partial map. - - The OMPDescriptorMapInfoGen pass has also been generalized into a map - finalization phase, now named OMPMapInfoFinalization. This pass was extended - to support the insertion of member maps into the BlockArg and MapOperands of - relevant map carrying operations. Similar to the method in which descriptor types - are expanded and constituient members inserted. - - Pull Request: https://github.com/llvm/llvm-project/pull/82853 - -commit 1710c8cf0f8def4984893e9dd646579de5528d95 -Author: Slava Zakharin -Date: Wed May 8 16:48:14 2024 -0700 - - [flang] Lowering changes for assigning dummy_scope to hlfir.declare. (#90989) - - The lowering produces fir.dummy_scope operation if the current - function has dummy arguments. Each hlfir.declare generated - for a dummy argument is then using the result of fir.dummy_scope - as its dummy_scope operand. This is only done for HLFIR. - - I was not able to find a reliable way to identify dummy symbols - in `genDeclareSymbol`, so I added a set of registered dummy symbols - that is alive during the variables instantiation for the current - function. The set is initialized during the mapping of the dummy - argument symbols to their MLIR values. It is reset right after - all variables are instantiated - this is done to avoid generating - hlfir.declare operations with dummy_scope for the clones of - the dummy symbols (e.g. this happens with OpenMP privatization). - - If this can be done in a cleaner way, please advise. - -commit f72454086af9d3f91a86e10dc1923849c5f670a8 -Author: Valentin Clement (バレンタイン クレメン) -Date: Tue May 7 08:29:21 2024 -0700 - - [flang][cuda] Fix retrieval of nested evaluation in cuf kernel (#91298) - - `loopEval` was declared inside the for loop to iterate over the nested - loops so the same loop control was redeclared for each level of the loop - nest. Make sure we are iterating over all the loops by putting - `loopEval` declaration ouside of the for loop. - -commit 6542e5663d1e599d5ed7c961842ce2fcdc9f5090 -Author: Leandro Lupori -Date: Mon May 6 13:14:18 2024 -0300 - - [flang][OpenMP] Move privatizations out of sections (#88191) - - Besides duplicating code, privatizing variables in every section - causes problems when synchronization barriers are used. This - happens because each section is executed by a given thread, which - will cause the program to hang if not all running threads execute - the barrier operation. - - Fixes https://github.com/llvm/llvm-project/issues/72824 - -commit 24f5fc77d43f4ae2dc1cb0c0902c5e22cbadf09e -Author: Kareem Ergawy -Date: Sat May 4 21:20:17 2024 +0200 - - [flang][MLIR][OpenMP] Extend delayed privatization for arrays and characters (#85023) - -commit cda8270981b666c492933a9df1d984d0d0f8433f -Author: vdonaldson <37090318+vdonaldson@users.noreply.github.com> -Date: Fri May 3 09:11:10 2024 -0400 - - [flang] Source code location for IF statements and constructs (#90853) - - Make source code locations for IF statements and IF construct component - statements more accurate. Make similar changes to ASSOCIATE, BLOCK, and - SELECT TYPE construct component statements. - -commit 37f6ba4fb2db2c78cda7d0a69cd0a2eff2b924e3 -Author: NimishMishra <42909663+NimishMishra@users.noreply.github.com> -Date: Thu May 2 21:58:20 2024 -0700 - - [flang][OpenMP] Fix construct privatization in default clause (#72510) - - Current implementation of default clause privatization incorrectly fails - to privatize in presence of non-OpenMP constructs (i.e. nested - constructs with regions whose symbols need to be privatized in the scope - of the parent OpenMP construct). This patch fixes the same by - considering non-OpenMP constructs separately by collecting symbols of a - nested region if it is a non-OpenMP construct with a region, and - privatizing it in the scope of the parent OpenMP construct. - - Fixes https://github.com/llvm/llvm-project/issues/71914 and - https://github.com/llvm/llvm-project/issues/71915 - -commit 57d0d3b4d638d170035f55d79f0202f1042de345 -Author: Kiran Chandramohan -Date: Wed May 1 12:58:50 2024 +0100 - - [Flang][OpenMP] Handle more character allocatable cases in privatization (#90449) - - Fixes #84732, #81947, #81946 - - Note: This is a fix till we enable delayed privatization. - -commit ecec1311fe0521404a11d6f3b90253259c8c3518 -Author: David Truby -Date: Tue Apr 30 22:38:36 2024 +0100 - - [flang] Remove double pointer indirection for _QQEnvironmentDefaults (#90615) - - A double pointer was being passed to the call to FortranStart rather than just a pointer to the EnvironmentDefaults.list. This now passes `null` directly when there's no EnvironmentDefaults.list and passes the list directly when there is, removing the original global variable which was a pointer to a pointer containing null or the EnvironmentDefaults.list global. - - Fixes #90537 - -commit 33ccd037fcd2b4346065ebcdcbb5d8c1887c2639 -Author: Krzysztof Parzyszek -Date: Tue Apr 30 11:44:55 2024 -0500 - - [flang][OpenMP] Pass symTable to all genXYZ functions, NFC (#90090) - - This will unify the interface a bit more. - -commit f815d1f71f644a6cfd2c22bf7898a1034be235ad -Author: Valentin Clement (バレンタイン クレメン) -Date: Tue Apr 30 08:27:28 2024 -0700 - - [flang][cuda] Fix iv store in cuf kernel (#90551) - - Store of the current induction value to the user IV was not placed - correctly in the body of the cuf kernel. - - @ImanHosseini - -commit 8d5386669ed63548daf1bee415596582d6d78d7d -Author: David Truby -Date: Mon Apr 29 14:16:25 2024 +0100 - - [flang] Generate main only when a Fortran program statement is present (#89938) - - This patch changes the behaviour for flang to only create and link to a - `main` entry point when the Fortran code has a program statement in it. - - This means that flang-new can be used to link even when the program is - a mixed C/Fortran code with `main` present in C and no entry point - present in Fortran. - - This also removes the `-fno-fortran-main` flag as this no longer has any - functionality. - -commit fac349a169976f822fb27f03e623fa0d28aec1f3 -Author: Christian Sigg -Date: Sun Apr 28 22:01:42 2024 +0200 - - Reapply "[mlir] Mark `isa/dyn_cast/cast/...` member functions depreca… (#90406) - - …ted. (#89998)" (#90250) - - This partially reverts commit 7aedd7dc754c74a49fe84ed2640e269c25414087. - - This change removes calls to the deprecated member functions. It does - not mark the functions deprecated yet and does not disable the - deprecation warning in TypeSwitch. This seems to cause problems with - MSVC. - -commit eb5907d06f2ffc3ed7fb8d4772bd3f8eab89c726 -Author: Valentin Clement (バレンタイン クレメン) -Date: Fri Apr 26 13:31:34 2024 -0700 - - [flang][cuda] Avoid to issue data transfer in device context (#90247) - - Data transfer should not be issued in device function. - -commit 7aedd7dc754c74a49fe84ed2640e269c25414087 -Author: dyung -Date: Fri Apr 26 12:09:13 2024 -0700 - - Revert "[mlir] Mark `isa/dyn_cast/cast/...` member functions deprecated. (#89998)" (#90250) - - This reverts commit 950b7ce0b88318f9099e9a7c9817d224ebdc6337. - - This change is causing build failures on a bot - https://lab.llvm.org/buildbot/#/builders/216/builds/38157 - -commit 950b7ce0b88318f9099e9a7c9817d224ebdc6337 -Author: Christian Sigg -Date: Fri Apr 26 16:28:30 2024 +0200 - - [mlir] Mark `isa/dyn_cast/cast/...` member functions deprecated. (#89998) - - See https://mlir.llvm.org/deprecation and - https://discourse.llvm.org/t/preferred-casting-style-going-forward. - -commit 09cdfd68a6cce69cd4c935b8c38ad391cea265ae -Author: Valentin Clement (バレンタイン クレメン) -Date: Thu Apr 25 08:50:52 2024 -0700 - - [flang][cuda] Avoid hlfir.declare verifier error when creating temps (#89984) - - When creating temporaries for implicit transfer, the newly create - hlfir.declare operation was missing some information like the shape and - the verifier was throwing an error. Fix it by making sure we have an - ExtendedValue when calling addSymbol to register the temp. - - ``` - error: loc("cuda-data-transfer.cuf":67:22): 'hlfir.declare' op of array entity - with a raw address base must have a shape operand that is a shape or shapeshift - ``` - - Thanks @jeanPerier for the advice! - - FYI @ImanHosseini - -commit 5cfd5d157cb5aca7ee348bd3e8b4fcd35a5762c8 -Author: Valentin Clement (バレンタイン クレメン) -Date: Thu Apr 25 08:50:34 2024 -0700 - - [flang][cuda] Do not generate data transfer within cuf kernel (#89973) - - CUDA data transfer with intrinsic assignment are not meant to be - generated in cuf kernel. This patch fix this issue. - - @ImanHosseini - -commit ca4dbc271842e8c9b5ed01bd66b687ab703896d0 -Author: Sergio Afonso -Date: Wed Apr 24 14:30:03 2024 +0100 - - [Flang][OpenMP][Lower] Update workshare-loop lowering (5/5) (#89215) - - This patch updates lowering from PFT to MLIR of workshare loops to - follow the loop wrapper approach. Unit tests impacted by this change are - also updated. - - As the last patch of the stack, this should compile and pass unit tests. - -commit cb2639196842630c0a1d5f91e26261f1c42b49fb -Author: Peter Klausler <35819229+klausler@users.noreply.github.com> -Date: Mon Apr 22 15:21:45 2024 -0700 - - [flang] Make proc characterization error conditional for generics (#89429) - - When the characteristics of a procedure depend on a procedure that - hasn't yet been defined, the compiler currently emits an unconditional - error message. This includes the case of a procedure whose - characteristics depend, perhaps indirectly, on itself. However, in the - case where the characteristics of a procedure are needed to resolve a - generic, we should not emit an error for a hitherto undefined procedure - -- either the call will resolve to another specific procedure, in which - case the error is spurious, or it won't, and then an error will issue - anyway. - - Fixes https://github.com/llvm/llvm-project/issues/88677. - -commit 9c9dea943706340f8a45dc74887bf9beddd67810 -Author: Krzysztof Parzyszek -Date: Mon Apr 22 13:04:20 2024 -0500 - - [flang][OpenMP] Concatenate begin and end clauses into single list (#89090) - - This will remove the distinction between begin clauses and end clauses, - and process all of them together. - -commit 9dbf3e2384e450c2b4f282b85b9ec47c65976194 -Author: Sergio Afonso -Date: Fri Apr 19 16:13:10 2024 +0100 - - [Flang][OpenMP] NFC: Simplify handling of insertion points (#89221) - - This patch replaces some `saveInsertionPoint`, `restoreInsertionPoint` - call pairs for an `InsertionGuard` instance where it makes sense within - Flang OpenMP lowering to make further modifications less error-prone. - -commit 992413de99588a60920f934de07d703efb432ade -Author: Krzysztof Parzyszek -Date: Thu Apr 18 12:02:04 2024 -0500 - - [flang][OpenMP] Move clause/object conversion to happen early, in genOMP (#87086) - - This removes the last use of genOmpObjectList2, which has now been - removed. - - --------- - - Co-authored-by: Sergio Afonso - -commit c8dca5bc0733e2fba81008fc33fcad1f45ba666a -Author: Sergio Afonso -Date: Wed Apr 17 12:17:50 2024 +0100 - - [Flang][OpenMP][Lower] Refactor lowering of compound constructs (#87070) - - This patch simplifies the lowering from PFT to MLIR of OpenMP compound - constructs (i.e. combined and composite). - - The new approach consists of iteratively processing the outermost leaf - construct of the given combined construct until it cannot be split - further. Both leaf constructs and composite ones have `gen...()` - functions that are called when appropriate. - - This approach enables treating a leaf construct the same way regardless - of if it appeared as part of a combined construct, and it also enables - the lowering of composite constructs as a single unit. - - Previous corner cases are now handled in a more straightforward way and - comments pointing to the relevant spec section are added. Directive sets - are also completed with missing LOOP related constructs. - -commit 3eb0ba34b0a2a29c2f34ead2b84fdf9b62cb29c1 -Author: Sergio Afonso -Date: Wed Apr 17 11:28:30 2024 +0100 - - [MLIR][Flang][OpenMP] Make omp.simdloop into a loop wrapper (#87365) - - This patch updates the definition of `omp.simdloop` to enforce the - restrictions of a wrapper operation. It has been renamed to `omp.simd`, - to better reflect the naming used in the spec. All uses of "simdloop" in - function names have been updated accordingly. - - Some changes to Flang lowering and OpenMP to LLVM IR translation are - introduced to prevent the introduction of compilation/test failures. The - eventual long term solution might be different. - -commit 4dd5180a2d43b088d7637c30c2654f3c01c46987 -Author: Sergio Afonso -Date: Tue Apr 16 11:08:25 2024 +0100 - - [Flang][OpenMP][Lower] Split MLIR codegen for clauses and constructs (#86963) - - This patch performs several cleanups with the main purpose of - normalizing the code patterns used to trigger codegen for MLIR OpenMP - operations and making the processing of clauses and constructs - independent. The following changes are made: - - - Clean up unused `directive` argument to - `ClauseProcessor::processMap()`. - - Move general helper functions in OpenMP.cpp to the appropriate section - of the file. - - Create `genClauses()` functions containing the clause - processing code specific for the associated OpenMP construct. - - Update `genOp()` functions to call the corresponding - `genClauses()` function. - - Sort calls to `ClauseProcessor::process()` alphabetically, - to avoid inadvertently relying on some arbitrary order. Update some - tests that broke due to the order change. - - Normalize `genOMP()` functions so they all delegate the generation of - MLIR to `genOp()` functions following the same pattern. - - Only process `nowait` clause on `TARGET` constructs if not compiling - for the target device. - - A later patch can move the calls to `genClauses()` out of - `genOp()` functions and passing completed clause structures - instead, in preparation to supporting composite constructs. That will - make it possible to reuse clause processing for a given leaf construct - when appearing alone or in a combined or composite construct, while - controlling where the associated code is produced. - -commit 76782e28869abf93716f72f195d55c28eaf263ed -Author: Kiran Chandramohan -Date: Tue Apr 16 10:29:26 2024 +0100 - - [Flang][OpenMP] NFC: Remove old reduction lowering code (#88798) - - The old code was replaced by - https://github.com/llvm/llvm-project/pull/80019. - -commit 78eac466095c205988ef1e2380033d042a169a3b -Author: Sergio Afonso -Date: Fri Apr 12 12:42:41 2024 +0100 - - [Flang][OpenMP][Lower] Use clause operand structures (#86802) - - This patch updates Flang lowering to use the new set of OpenMP clause - operand structures and their groupings into directive-specific sets of - clause operands. - - It simplifies the passing of information from the clause processor and - the creation of operations. - - The `DataSharingProcessor` is slightly modified to not hold delayed - privatization state. Instead, optional arguments are added to - `processStep1` which are only passed when delayed privatization is used. - This enables using the clause operand structure for `private` and - removes the need for the ad-hoc `DelayedPrivatizationInfo` structure. - - The processing of the `schedule` clause is updated to process the - `chunk` modifier rather than requiring two separate calls to the - `ClauseProcessor`. - - Lowering of a block-associated `ordered` construct is updated to emit a - TODO error if the `simd` clause is specified, since it is not currently - supported by the `ClauseProcessor` or later compilation stages. - - Removed processing of `schedule` from `omp.simdloop`, as it doesn't - apply to `simd` constructs. - -commit 298ea9bfd50ca41c77e45065700df06adb6264ae -Author: Raghu Maddhipatla <7686592+raghavendhra@users.noreply.github.com> -Date: Thu Apr 11 10:26:54 2024 -0500 - - [Flang] [OpenMP] [MLIR] [Lowering] Add lowering support for IS_DEVICE_PTR and HAS_DEVICE_ADDR clauses on OMP TARGET directive. (#88206) - - Added lowering support for IS_DEVICE_PTR and HAS_DEVICE_ADDR clauses for - OMP TARGET directive and added related tests for these changes. - - IS_DEVICE_PTR and HAS_DEVICE_ADDR clauses apply to OMP TARGET directive - OpenMP spec states - - The **is_device_ptr** clause indicates that its list items are device - pointers. - - The **has_device_addr** clause indicates that its list items already - have device addresses and therefore they may be directly accessed from a - target device. - - Whereas USE_DEVICE_PTR and USE_DEVICE_ADDR clauses apply to OMP TARGET - DATA directive and OpenMP spec for them states - - Each list item in the **use_device_ptr** clause results in a new list - item that is a device pointer that refers to a device address - - Each list item in a **use_device_addr** clause that is present in the - device data environment is treated as if it is implicitly mapped by a - map clause on the construct with a map-type of alloc - - Fixed build error caused by Squash merge which needs rebase - -commit eec41d2f8d81b546d7b97648cca6b2d656104bd3 -Author: Raghu Maddhipatla <7686592+raghavendhra@users.noreply.github.com> -Date: Tue Apr 9 16:18:56 2024 -0500 - - Revert "[Flang] [OpenMP] [Semantics] [MLIR] [Lowering] Add lowering support for IS_DEVICE_PTR and HAS_DEVICE_ADDR clauses on OMP TARGET directive." (#88198) - - Reverts llvm/llvm-project#74187 - -commit 9d9560facb5597e0232ab15716a7915a33d4f0a6 -Author: Raghu Maddhipatla <7686592+raghavendhra@users.noreply.github.com> -Date: Tue Apr 9 14:59:20 2024 -0500 - - [Flang] [OpenMP] [Semantics] [MLIR] [Lowering] Add lowering support for IS_DEVICE_PTR and HAS_DEVICE_ADDR clauses on OMP TARGET directive. (#74187) - - Added lowering support for IS_DEVICE_PTR and HAS_DEVICE_ADDR clauses for - OMP TARGET directive and added related tests for these changes. - - IS_DEVICE_PTR and HAS_DEVICE_ADDR clauses apply to OMP TARGET directive - OpenMP spec states - - `The **is_device_ptr** clause indicates that its list items are device - pointers.` - - `The **has_device_addr** clause indicates that its list items already - have device addresses and therefore they may be directly accessed from a - target device.` - - Whereas USE_DEVICE_PTR and USE_DEVICE_ADDR clauses apply to OMP TARGET - DATA directive and OpenMP spec for them states - - `Each list item in the **use_device_ptr** clause results in a new list - item that is a device pointer that refers to a device address` - - `Each list item in a **use_device_addr** clause that is present in the - device data environment is treated as if it is implicitly mapped by a - map clause on the construct with a map-type of alloc` - -commit 3f2f700633bbcc0cb5ada17f5736b43f9c1e426e -Author: Jie Fu -Date: Sat Apr 6 07:31:53 2024 +0800 - - [flang] Fix -Wunused-but-set-variable in Bridge.cpp (NFC) - - llvm-project/flang/lib/Lower/Bridge.cpp:3775:14: - error: variable 'nbDeviceResidentObject' set but not used [-Werror,-Wunused-but-set-variable] - unsigned nbDeviceResidentObject = 0; - ^ - 1 error generated. - -commit 953aa102a90099ae655eaa4645dd8d15c95ea86a -Author: Valentin Clement (バレンタイン クレメン) -Date: Fri Apr 5 09:11:37 2024 -0700 - - [flang][cuda] Lower device to host and device to device transfer (#87387) - - Add more support for CUDA data transfer in assignment. This patch adds - device to device and device to host support. If device symbols are - present on the rhs, some implicit data transfer are initiated. A - temporary is created and the data are transferred to the host. The - expression is evaluated on the host and the assignment is done. - -commit a4798bb0b67533b37d6b34fd5292714aac3b17d9 -Author: jeanPerier -Date: Tue Apr 2 14:29:29 2024 +0200 - - [flang][NFC] use mlir::SymbolTable in lowering (#86673) - - Whenever lowering is checking if a function or global already exists in - the mlir::Module, it was doing module->lookup. - - On big programs (~5000 globals and functions), this causes important - slowdowns because these lookups are linear. Use mlir::SymbolTable to - speed-up these lookups. The SymbolTable has to be created from the - ModuleOp and maintained in sync. It is therefore placed in the - converter, and FirOPBuilders can take a pointer to it to speed-up the - lookups. - - This patch does not bring mlir::SymbolTable to FIR/HLFIR passes, but - some passes creating a lot of runtime calls could benefit from it too. - More analysis will be needed. - - As an example of the speed-ups, this patch speeds-up compilation of - Whizard compare_amplitude_UFO.F90 from 5 mins to 2 mins on my machine - (there is still room for speed-ups). - -commit 79199753fd6c39aac881b9556614c5db2775dc85 -Author: Krzysztof Parzyszek -Date: Thu Mar 28 07:46:01 2024 -0500 - - [flang][OpenMP] Make several function local to OpenMP.cpp, NFC (#86726) - - There were several functions, mostly reduction-related, that were only - called from OpenMP.cpp. Remove them from OpenMP.h, and make them local - in OpenMP.cpp: - - genOpenMPReduction - - findReductionChain - - getConvertFromReductionOp - - updateReduction - - removeStoreOp - - Also, move the function bodies out of the "public" section. - -commit 4d177435bae03551245ffdc4dfcee5345323121d -Author: Krzysztof Parzyszek -Date: Wed Mar 27 11:37:09 2024 -0500 - - [flang][OpenMP] Rename makeList overloads to make{Objects,Clauses}, NFC (#86725) - - Reserve `makeList` to create a list given an explicit converter - function. - -commit 148a55795de7ac465a8e494d5d382e100da643f6 -Author: Krzysztof Parzyszek -Date: Tue Mar 26 13:54:26 2024 -0500 - - [flang][OpenMP] Make OpenMP clause representation language-agnostic (#86289) - - The clause templates defined in ClauseT.h were originally based on - flang's parse tree nodes. Since those representations are going to be - reused for clang (together with the clause splitting code), it makes - sense to separate them from flang, and instead have them based on the - actual OpenMP spec (v5.2). - - The member names in the templates follow the naming presented in the - spec, and the representation (e.g. members) is derived from the clause - definitions as described in the spec. - - Since the representations of some clauses has changed (while preserving - the information), the current code using the clauses (especially the - code converting parser::OmpClause to omp::Clause) needs to be adjusted. - - This patch does not make any functional changes. - -commit 4998587e6f5f66d464ac22ad4c11fe9afd2d56ab -Author: Daniel Chen -Date: Tue Mar 26 11:29:24 2024 -0400 - - [Flang] Support for passing procedure pointer, reference to a function that returns a procedure pointer to structure constructor. (#86533) - - This PR fixes `not yet implemented: procedure pointer component in - structure constructor` as shown in the following test case. - - ``` - MODULE M - TYPE :: DT - PROCEDURE(Fun), POINTER, NOPASS :: pp1 - END TYPE - - CONTAINS - - INTEGER FUNCTION Fun(Arg) - INTEGER :: Arg - Fun = Arg - END FUNCTION - - END MODULE - - PROGRAM MAIN - USE M - IMPLICIT NONE - TYPE (DT) :: v2 - PROCEDURE(FUN), POINTER :: pp2 - v2 = DT(pp2) - v2 = DT(bar()) - CONTAINS - FUNCTION BAR() RESULT(res) - PROCEDURE(FUN), POINTER :: res - END - END - ``` - -commit 4e6745cc4db309c0e1b5e41d4598f67763f4c096 -Author: Valentin Clement (バレンタイン クレメン) -Date: Mon Mar 25 11:53:39 2024 -0700 - - [flang][cuda] Lower simple host to device data transfer (#85960) - - In CUDA Fortran data transfer can be done via assignment statements - between host and device variables. - - This patch introduces a `fir.cuda_data_transfer` operation that - materialized the data transfer between two memory references. - - Simple transfer not involving descriptors from host to device are also - lowered in this patch. When the rhs is an expression that required an - evaluation, a temporary is created. The evaluation is done on the host - and then the transfer is initiated. - - Implicit transfer when device symbol are present on the rhs is not part - of this patch. Transfer from device to host is not part of this patch. - -commit 564035edb0e67a177fd911fc779cd64300a9b5ef -Author: Krzysztof Parzyszek -Date: Mon Mar 25 09:54:04 2024 -0500 - - [flang][OpenMP] Organize `genOMP` functions in OpenMP.cpp, NFC (#86309) - - Put all of the genOMP functions together, organize them in two groups: - for declarative constructs and for other (executable) constructs. - - Replace visit functions for OpenMPDeclarativeConstruct and - OpenMPConstruct from listing individual visitors for each variant - alternative to using a single generic visitor. Essentially, going from - ``` - std::visit( - [](foo x) { genOMP(foo); } - [](bar x) { TODO } - [](baz x) { genOMP(baz); } - ) - ``` - to - ``` - void genOMP(bar x) { // Separate visitor for an unhandled case - TODO - } - - [...] - std::visit([&](auto &&s) { genOMP(s); }) // generic - ``` - - This doesn't change any functionality, just reorganizes the functions a - bit. The intent here is to improve the readability of this file. - -commit de7a50fb88faa1dafee33f10149561936214062b -Author: jeanPerier -Date: Fri Mar 22 11:13:04 2024 +0100 - - [flang] Fix lowering of host associated cray pointee symbols (#86121) - - Cray pointee symbols can be host associated from a module or host - procedure while the related cray pointer is not explicitly associated. - This caused the "not yet implemented: lowering symbol to HLFIR" to fire - when lowering a reference to the cray pointee and fetching the cray - pointer. - - This patch: - - Ensures cray pointers are always instantiated when instantiating a - cray pointee. - - Fix internal procedure lowering to deal with cray pointee host - association like it does for pointers (the lowering strategy for cray - pointee is to create a pointer that is updated with the cray pointer - value before being fetched). - - This should fix the bug reported in - https://github.com/llvm/llvm-project/issues/85420. - -commit 2ab106cbd428984df3dda2f6983d5f956917cb69 -Author: Krzysztof Parzyszek -Date: Thu Mar 21 15:12:43 2024 -0500 - - [flang][OpenMP] Convert processTODO and remove unused objects (#81627) - - Remove `ClauseIterator2` and `clauses2` from ClauseProcessor. - - [Clause representation 5/6] - -commit 734026347cca85cf0e242ef5f04896f55e0ac113 -Author: Sergio Afonso -Date: Thu Mar 21 12:25:48 2024 +0000 - - Reapply "[Flang][OpenMP][Lower] NFC: Move clause processing helpers into the ClauseProcessor (#85258)" (#85807) - - This patch contains slight modifications to the reverted PR #85258 to - avoid issues with constructs containing multiple reduction clauses, - uncovered by a test on the gfortran testsuite. - - This reverts commit 9f80444c2e669237a5c92013f1a42b91b5609012. - -commit 84115494d6475e1aea3cdd1163d3a88243b75f36 -Author: Krzysztof Parzyszek -Date: Wed Mar 20 15:00:29 2024 -0500 - - [flang][Lower] Convert OMP Map and related functions to evaluate::Expr (#81626) - - The related functions are `gatherDataOperandAddrAndBounds` and - `genBoundsOps`. The former is used in OpenACC as well, and it was - updated to pass evaluate::Expr instead of parser objects. - - The difference in the test case comes from unfolded conversions of index - expressions, which are explicitly of type integer(kind=8). - - Delete now unused `findRepeatableClause2` and `findClause2`. - - Add `AsGenericExpr` that takes std::optional. It already returns - optional Expr. Making it accept an optional Expr as input would reduce - the number of necessary checks when handling frequent optional values in - evaluator. - - [Clause representation 4/6] - -commit d84252e064b3f35aa879c10e207f77e931f351d9 -Author: Sergio Afonso -Date: Wed Mar 20 11:19:38 2024 +0000 - - [MLIR][OpenMP] NFC: Uniformize OpenMP ops names (#85393) - - This patch proposes the renaming of certain OpenMP dialect operations with the - goal of improving readability and following a uniform naming convention for - MLIR operations and associated classes. In particular, the following operations - are renamed: - - - `omp.map_info` -> `omp.map.info` - - `omp.target_update_data` -> `omp.target_update` - - `omp.ordered_region` -> `omp.ordered.region` - - `omp.cancellationpoint` -> `omp.cancellation_point` - - `omp.bounds` -> `omp.map.bounds` - - `omp.reduction.declare` -> `omp.declare_reduction` - - Also, the following MLIR operation classes have been renamed: - - - `omp::TaskLoopOp` -> `omp::TaskloopOp` - - `omp::TaskGroupOp` -> `omp::TaskgroupOp` - - `omp::DataBoundsOp` -> `omp::MapBoundsOp` - - `omp::DataOp` -> `omp::TargetDataOp` - - `omp::EnterDataOp` -> `omp::TargetEnterDataOp` - - `omp::ExitDataOp` -> `omp::TargetExitDataOp` - - `omp::UpdateDataOp` -> `omp::TargetUpdateOp` - - `omp::ReductionDeclareOp` -> `omp::DeclareReductionOp` - - `omp::WsLoopOp` -> `omp::WsloopOp` - -commit 9f80444c2e669237a5c92013f1a42b91b5609012 -Author: Sergio Afonso -Date: Tue Mar 19 13:25:33 2024 +0000 - - Revert "[Flang][OpenMP][Lower] NFC: Move clause processing helpers into the ClauseProcessor (#85258)" - - Reverting due to failing gfortran test. - - This reverts commit 2f2f16f32bb2a6c250b19adbc229d9dc3b38640c. - -commit 2f2f16f32bb2a6c250b19adbc229d9dc3b38640c -Author: Sergio Afonso -Date: Tue Mar 19 11:49:45 2024 +0000 - - [Flang][OpenMP][Lower] NFC: Move clause processing helpers into the ClauseProcessor (#85258) - - This patch moves some code in PFT to MLIR OpenMP lowering to the - `ClauseProcessor` class. This is so that some behavior that is related - to certain clauses stays within the `ClauseProcessor` and it's not the - caller the one responsible for always doing this when the clause is - present. - -commit d671ebe46ce6bb542ab81ea120751c985f3fe4a3 -Author: Sergio Afonso -Date: Tue Mar 19 10:45:59 2024 +0000 - - [Flang][Lower] NFC: Replace SmallVector with more suitable alternatives (#85227) - - In this patch some uses of `llvm::SmallVector` in Flang's lowering to - MLIR are replaced by other types (i.e. `llvm::ArrayRef` and - `llvm::SmallVectorImpl`) which are intended for these uses. This - generally prevents relying on always passing small vectors with a - particular number of elements in the stack. - -commit f6a2a55ba1fe1a4b720b8760704785d12137b35e -Author: Valentin Clement (バレンタイン クレメン) -Date: Mon Mar 18 19:46:11 2024 -0700 - - [flang][cuda] Handle lowering of stars in cuf kernel launch parameters (#85695) - - Parsing of the cuf kernel loop directive has been updated to handle - variants with the * syntax. This patch updates the lowering to make use - of them. - - - If the grid or block syntax uses only stars then the operation - variadic operand remains empty. - - If there is values and stars, then stars are represented as a zero - constant value. - -commit 8a6a0f1954937341abd501529f3d7454937110a5 -Author: Valentin Clement (バレンタイン クレメン) -Date: Mon Mar 18 17:11:04 2024 -0700 - - [flang][cuda] Add proper TODO for cuda fortran assignment (#85705) - - Data transfer between host and device can be done with assignment - statements in CUDA Fortran. This is currently not lowered so adding a - proper TODO. - - - https://docs.nvidia.com/hpc-sdk/archive/24.3/compilers/cuda-fortran-prog-guide/index.html#cfref-data-trans-assgn-statemts - -commit 87cee71b3738547465481740fcbde7d73283678f -Author: Kareem Ergawy -Date: Mon Mar 18 10:44:44 2024 +0100 - - [flang][MLIR][OpenMP] Extend delayed privatization for scalar allocatables and pointers (#84740) - - One more step in extending support for delayed privatization. This diff - adds support for scalar allocatables and pointers. - -commit 037a32a9a73286cf6e1bf439c61b03767658b564 -Author: Krzysztof Parzyszek -Date: Fri Mar 15 16:42:06 2024 -0500 - - [flang][OpenMP] Convert DataSharingProcessor to omp::Clause (#81629) - - [Clause representation 6/6] - -commit 60fa2b0670b874b702ddb9f81d098af692ea6875 -Author: Peter Klausler <35819229+klausler@users.noreply.github.com> -Date: Fri Mar 15 13:57:42 2024 -0700 - - [flang] Parse !$CUF KERNEL DO <<< (*) (#85338) - - Accept and represent asterisks within the parenthesized grid and block - specification lists. - -commit 63e70c05537c54edae975c8b5449ff87444abec2 -Author: Krzysztof Parzyszek -Date: Fri Mar 15 07:04:42 2024 -0500 - - [flang][OpenMP] Convert repeatable clauses (except Map) in ClauseProc… (#81623) - - …essor - - Rename `findRepeatableClause` to `findRepeatableClause2`, and make the - new `findRepeatableClause` operate on new `omp::Clause` objects. - - Leave `Map` unchanged, because it will require more changes for it to - work. - - [Clause representation 3/6] - -commit 096ee4e16fd62cd578d20ec4e8ad4756f4e369ee -Author: agozillon -Date: Wed Mar 13 16:18:21 2024 +0100 - - [Flang][OpenMP] Implement "promotion" of use_device_ptr non-cptr arguments to use_device_addr (#82834) - - This effectively implements some now deprecated OpenMP functionality - that some applications (most notably at the moment GenASiS) - unfortunately depend on (deprecated in specification version 5.2): - - "If a list item in a use_device_ptr clause is not of type C_PTR, the - behavior is as if the list item appeared in a use_device_addr clause. - Support for such list items in a use_device_ptr clause is deprecated." - - This PR downgrades the hard-error to a deprecated warning and "promotes" - the above cases by simply moving the offending operands from the - use_device_ptr value list to the back of the use_device_addr list (and - moves the related symbols, locs and types that form the BlockArgs - correspondingly) and then the generation of the target data construct - proceeds as normal. - -commit f46f5a01f4d5a7dcaf4a8fde5fc44eafdd9dbf27 -Author: Tom Eccles -Date: Wed Mar 13 14:51:09 2024 +0000 - - [flang][OpenMP][OMPIRBuilder][mlir] Optionally pass reduction vars by ref (#84304) - - Previously reduction variables were always passed by value into and out - of the initialization and combiner regions of the OpenMP reduction - declare operation. - - This worked well for reductions of primitive types (and might perform - better than passing by reference). But passing by reference will be - useful for array and derived type reductions (e.g. to move allocation - inside of the init region). - - Passing reductions by reference requires different LLVM-IR generation - when lowering from MLIR because some of the loads/stores/allocations - will now be moved inside of the init and combiner regions. This - alternate code generation is requested using a new attribute to - omp.wsloop and omp.parallel. - - Existing lowerings from mlir are unaffected (these will continue to use - the by-value argument passing. - - Flang will continue to pass by-value argument passing for trivial types - unless a (hidden) command line argument is supplied. Non-trivial types - will always use the by-ref lowering. - - Array reductions are not ready yet (but are coming very soon). In the - meantime, this is tested by forcing existing reductions to use by-ref. - - Commit series for by-ref OpenMP reductions 3/3 - - --------- - - Co-authored-by: Mats Petersson - -commit a4aac22683a44264bb3883242b1c6b711f534e8b -Author: harishch4 -Date: Tue Mar 12 20:04:35 2024 +0530 - - [Flang][OpenMp] Fix to threadprivate not working with host-association. (#74966) - - This patch considers host-associated variables to generate threadprivate - Ops. - - Fixes: #60763 #84561 - -commit 3b30559c088d679ca8fe491158e6c32db630f223 -Author: Kareem Ergawy -Date: Mon Mar 11 10:38:28 2024 +0100 - - [flang][OpenMP] Only use HLFIR base in privatization logic (#84123) - - Modifies the privatization logic so that the emitted code only used the - HLFIR base (i.e. SSA value `#0` returned from `hlfir.declare`). Before - that, that emitted privatization logic was a mix of using `#0` and `#1` - which leads to some difficulties trying to move to delayed privatization - (see the discussion on #84033). - -commit c03fd37d9b61bc6063e4d6e983846f877e83ac67 -Author: Anchu Rajendran S -Date: Thu Mar 7 08:23:58 2024 -0800 - - [flang] Changes to map variables in link clause of declare target (#83643) - - As per the OpenMP standard, "If a variable appears in a link clause on a - declare target directive that does not have a device_type clause with - the nohost device-type-description then it is treated as if it had - appeared in a map clause with a map-type of tofrom" is an implicit - mapping rule. Before this change, such variables were mapped as to by - default. - -commit afb05cd6469215232bd83e7cfbe59d2e1852567f -Author: agozillon -Date: Tue Mar 5 17:27:16 2024 +0100 - - [Flang][MLIR][OpenMP] Create a deferred declare target marking process for Bridge.cpp (#78502) - - This patch seeks to create a process that happens on module finalization - for OpenMP, in which a list of operations that had declare target - directives applied to them and were not generated at the time of - processing the original declare target directive are re-checked to apply - the appropriate declare target semantics. - - This works by maintaining a vector of declare target related data inside - of the FIR converter, in this case the symbol and the two relevant - unsigned integers representing the enumerators. This vector is added to - via a new function called from Bridge.cpp, insertDeferredDeclareTargets, - which happens prior to the processing of the directive (similarly to - getDeclareTargetFunctionDevice currently for requires), it effectively - checks if the Operation the declare target directive is applied to - currently exists, if it doesn't it appends to the vector. This is a - seperate function to the processing of the declare target via the - overloaded genOMP as we unfortunately do not have access to the list - without passing it through every call, as the AbstractConverter we pass - will not allow access to it (I've seen no other cases of casting it to a - FirConverter, so I opted to not do that). - - The list is then processed at the end of the module in the - finalizeOpenMPLowering function in Bridge by calling a new function - markDelayedDeclareTargetFunctions which marks the latently generated - operations. In certain cases, some still will not be generated, e.g. if - an interface is defined, marked as declare target, but has no definition - or usage in the module then it will not be emitted to the module, so due - to these cases we must silently ignore when an operation has not been - found via it's symbol. - - The main use-case for this (although, I imagine there is others) is for - processing interfaces that have been declared in a module with a declare - target directive but do not have their implementation defined in the - same module. For example, inside of a seperate C++ module that will be - linked in. In cases where the interface is called inside of a target - region it'll be marked as used on device appropriately (although, - realistically a user should explicitly mark it to match the - corresponding definition), however, in cases where it's used in a - non-clear manner through something like a function pointer passed to an - external call we require this explicit marking, which this patch adds - support for (currently will cause the compiler to crash). - - This patch also adds documentation on the declare target process and - mechanisms within the compiler currently. - -commit b585c43dccb2c608f698419a9c8d7645d3120fdb -Author: Kiran Chandramohan -Date: Tue Mar 5 10:28:36 2024 +0000 - - [Flang][OpenMP] : Add a temporary lowering for workshare directive (#78268) - - As a temporary solution, lower workshare to the single directive - -commit 5225901ecd53ba1e3f1519f3edea7d1aec15502d -Author: Peter Klausler <35819229+klausler@users.noreply.github.com> -Date: Thu Feb 29 13:02:39 2024 -0800 - - [flang] Add [[maybe_unused]] to fix -Werror build (#83456) - - Add the [[maybe_unused]] attribute to a variable in - lib/Lower/OpenMP/OpenMP.cpp to avoid a (possibly bogus) unused variable - warning when building with GCC 9.3.0. - -commit 06f775a82f6f562f8de75053f62c9c0dbeaa67d2 -Author: jeanPerier -Date: Wed Feb 28 14:30:29 2024 +0100 - - [flang] Give internal linkage to internal procedures (#81929) - - Internal procedures cannot be called directly from outside the host - procedure, so there is no point giving them external linkage. The only - reason flang did is because it is the default in MLIR. - - Giving external linkage to them: - - prevents deleting them when not used/inlined by LLVM - - causes bugs with shared libraries (at least on linux x86-64) because - the call to the internal function could lead to a dynamic loader call - that would overwrite r10 register (the static chain pointer) due to - system calls and did not restore (it seems it does not expect r10 to be - used for PLT calls). - - This patch gives internal linkage to internal procedures: - - Note: the llvm.linkage attribute name cannot be obtained via a - getLinkageAttrName since it is not the same name as the one used in the - LLVM dialect. It is just a placeholder defined in - mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp until the func dialect - gets a real linkage model. So simply avoid hard coding it too many times - in lowering. - -commit 26b8be201e2d15867bb327a8008fffb3e34d42a5 -Author: Kareem Ergawy -Date: Wed Feb 28 10:15:57 2024 +0100 - - [flang][OpenMP][MLIR] Basic support for delayed privatization code-gen (#81833) - - Adds basic support for emitting delayed privatizers from flang. So far, - only types of symbols are supported (i.e. scalars), support for more - complicated types will be added later. This also makes sure that - reduction and delayed privatization work properly together by merging - the - body-gen callbacks for both in case both clauses are present on the - parallel construct. - -commit b3189b13b274a3411f939574aa573a7656bf372b -Author: Valentin Clement (バレンタイン クレメン) -Date: Tue Feb 27 11:23:17 2024 -0800 - - [flang][cuda] CUF kernel loop directive (#82836) - - This patch introduces a new operation to represent the CUDA Fortran - kernel loop directive. This operation is modeled as a LoopLikeOp - operation in a similar way to acc.loop. - - The CUFKernelDoConstruct parse tree node is also placed correctly in the - PFTBuilder to be available in PFT evaluations. - - Lowering from the flang parse-tree to MLIR is also done. - -commit e50a231dcdd6aafa922b177b4fc4629bb7a10a79 -Author: Leandro Lupori -Date: Wed Feb 21 14:51:37 2024 -0300 - - [flang][OpenMP] Add support for copyprivate (#80485) - - Add initial handling of OpenMP copyprivate clause in Flang. - - When lowering copyprivate, Flang generates the copy function - needed by each variable and builds the appropriate - omp.single's CopyPrivateVarList. - - This is patch 3 of 4, to add support for COPYPRIVATE in Flang. - Original PR: https://github.com/llvm/llvm-project/pull/73128 - -commit 58f45d909d2a1565128846e423b480808736f214 -Author: Pranav Bhandarkar -Date: Wed Feb 21 11:28:25 2024 -0600 - - [flang][openmp] - depend clause support in target, target enter/update/exit data constructs (#81610) - - This patch adds support in flang for the depend clause in target and - target enter/update/exit constructs. Previously, the following line in a - fortran program would have resulted in the error shown below it. - - !$omp target map(to:a) depend(in:a) - - - "not yet implemented: Unhandled clause DEPEND in TARGET construct" - -commit 4d4af15c3fb671ed9f7eef9f29ebd6fde15618df -Author: Kareem Ergawy -Date: Wed Feb 21 15:55:42 2024 +0100 - - [NFC][flang][OpenMP] Split `DataSharing` and `Clause` processors (#81973) - - This started as an experiment to reduce the compilation time of - iterating over `Lower/OpenMP.cpp` a bit since it is too slow at the - moment. Trying to do that, I split the `DataSharingProcessor`, - `ReductionProcessor`, and `ClauseProcessor` into their own files and - extracted some shared code into a util file. All of these new `.h/.cpp` - files as well as `OpenMP.cpp` are now under a `Lower/OpenMP/` directory. - - This resulted is a slightly better organization of the OpenMP lowering - code and hence opening this NFC. - - As for the compilation time, this unfortunately does not affect it much - (it shaves off a few seconds of `OpenMP.cpp` compilation) since from - what I learned the bottleneck is in `DirectivesCommon.h` and - `PFTBuilder.h` which both consume a lot of time in template - instantiation it seems. - -commit e769fb8699e3fa8e40623764f7713bfc783b0330 -Author: jeanPerier -Date: Thu Feb 15 09:06:42 2024 +0100 - - [flang] prevent legacy lowering from being called in pointer assignment (#81750) - - When doing a pointer assignment with an RHS that is an array section, - the code fell in the legacy lowering code even with HLFIR enabled. - Escape this old code when HLFIR is on. - - Should fix #80884. - -commit d1f510cca8e966bd1742bf17256bfec99dcdf229 -Author: Mats Petersson -Date: Tue Feb 13 14:32:26 2024 +0000 - - Fix warning by removing unused variable (#81604) - - Apparently, some compilers [correctly] warn that the variable that was - created prior to this change is unused. - - This reemoves the variable. - -commit 1af073a11cb2ae5a52205e66f33d0ec9bbcbb5e0 -Author: Krzysztof Parzyszek -Date: Mon Feb 12 19:15:55 2024 -0600 - - [flang][OpenMP] Pass semantics context to all generating functions in… (#81269) - - … lower - - The convention is to pass it after "symTable" if present, otherwise - after "converter": - - converter, symTable, semaCtx - - converter, semaCtx - - This makes the interfaces more uniform---some of these functions were - already taking the semantics context, while others were not. - - The context will be used in future patches. - -commit b2b3a5248540320e74347fcdaffbd148d1e9d494 -Author: Mats Petersson -Date: Fri Feb 9 18:05:51 2024 +0000 - - Skip compiler directives between OMP PARALLEL DO and the loop (#81021) - - This fixes a compilation error when code like this is presented to the - compiler: - - !$OMP PARALLEL DO - !DIR$ VECTOR ALIGNED - DO 20 i=1,N - a = a + 0.5 - 20 CONTINUE - - The directive itself is later ignored (with a warning that this is - happening), but because the compiler already errored out before that - point, it completely fails to compile this code. Other compilers accept - the code without complaints. - -commit b081e9d4cafe2563c513ed7b5ae3ced6d177b657 -Author: Daniel Chen -Date: Fri Feb 9 10:56:57 2024 -0500 - - [Flang] Fix NULLIFY statement that returns too early for multiple procedure pointer objects. (#81164) - - The current code that handles NULLIFY statement for procedure pointer - returns after the 1st object. - This PR is to remove the `return` so it can nullify multiple procedure - pointer objects. - -commit 0a45d172d3229074d414e1942d6bafa2b4ae9126 -Author: jeanPerier -Date: Mon Feb 5 10:12:33 2024 +0100 - - [flang] Do not instantiate runtime info globals in functions (#80447) - - Runtime globals are compiler generated globals injected in user scopes. - They are never referred to directly in lowering code, we only need th - fur.global for them. Yet lowering was creating hlfir.declare for them in - module procedures. In modern fortran apps, this blows up the generated - IR for nothing (Types with dozens of components, type bound procedures - and parents can create in the order of 10 000 runtime info globals to - describe them, if there is a 100 module procedure, that is that is a few - million operations generated and processed in each pass for nothing). - -commit bd8bec27e25022b07ec7044654cd6a1efcd9704f -Author: Daniel Chen -Date: Wed Jan 31 11:24:17 2024 -0500 - - [Flang] Support NULL(procptr): null intrinsic that has procedure pointer argument. (#80072) - - This PR adds support for NULL intrinsic to have a procedure pointer - argument. - -commit 837bff11cb7d31f40805c73d4f539960a77eda33 -Author: Sergio Afonso -Date: Tue Jan 30 13:45:56 2024 +0000 - - [Flang][Lower] Attach target_cpu and target_features attributes to MLIR functions (#78289) - - This patch forwards the target CPU and features information from the - Flang frontend to MLIR func.func operation attributes, which are later - used to populate the target_cpu and target_features llvm.func - attributes. - - This is achieved in two stages: - - 1. Introduce the `fir.target_cpu` and `fir.target_features` module - attributes with information from the target machine immediately after - the initial creation of the MLIR module in the lowering bridge. - - 2. Update the target rewrite flang pass to get this information from the - module and pass it along to all func.func MLIR operations, respectively - as attributes named `target_cpu` and `target_features`. These attributes - will be automatically picked up during Func to LLVM dialect lowering and - used to initialize the corresponding llvm.func named attributes. - - The target rewrite and FIR to LLVM lowering passes are updated with the - ability to override these module attributes, and the `CodeGenSpecifics` - optimizer class is augmented to make this information available to - target-specific MLIR transformations. - - This completes a full flow by which target CPU and features make it all - the way from compiler options to LLVM IR function attributes. - -commit 181eab27d244b9a9eb32d6716f9c38f7f3723356 -Author: jeanPerier -Date: Mon Jan 29 18:28:56 2024 +0100 - - [flang] Set KIND in compiler generated COUNT for SIZE(PACK) (#79801) - - Compiler was rewriting SIZE(PACK(x, MASK)) to COUNT(MASK). It was - wrapping the COUNT call without a KIND argument (leading to INTEGER(4) - result in the characteristics) in an Expr (implying - INTEGER(8) result), this lead to inconsistencies that later hit verifier - errors in lowering. - - Set the KIND argument to the KIND of ExtentType to ensure the built - expression is consistent. - - This requires giving access to some safe place where the "kind" name can - be saved and turned into a CharBlock (count has a DIM argument that - require using the KIND keyword here). For the FoldingContext that belong - to SemanticsContext, this is the same string set as the one used by - SemanticsContext for similar purposes. - -commit 5062a178bf9dd46008b8f7a182facb6152c46889 -Author: Valentin Clement (バレンタイン クレメン) -Date: Mon Jan 22 10:31:37 2024 -0800 - - [flang][openacc] Lower loop directive to the new acc.loop op design (#65417) - - acc.loop was redesigned in https://reviews.llvm.org/D159229. This patch - updates the lowering to match the new op. - - DO CONCURRENT construct will be added in a follow up patch. - - Note that the pre-commit ci will fail until D159229 is merged. - - Depends on #67355 - -commit c5a9e354379d29ee763e9982faf57398789c8d5b -Author: Krzysztof Parzyszek -Date: Mon Jan 15 08:01:41 2024 -0600 - - [Flang][OpenMP] Push genEval calls to individual operations, NFC (#77758) - - Introduce `genNestedEvaluations` that will lower all evaluations nested - in the given, accouting for a potential COLLAPSE directive. - - Recursive lowering [2/5] - -commit a2d7af757bc33dc91f2e038742915a146cfb0c13 -Author: Katherine Rasmussen -Date: Tue Jan 2 10:40:47 2024 -0800 - - [flang] Add notify-type and notify-wait-stmt (#76594) - - Add `notify-type` to `iso_fortran_env` module. Add `notify-wait-stmt` to - the parser and add checks for constraints on the statement, `C1177` and - `C1178`, from the Fortran 2023 standard. Add three semantics tests for - `notify-wait-stmt`. - -commit c373f58134997a6d037f0143f13f97451278700f -Author: jeanPerier -Date: Tue Dec 19 17:17:09 2023 +0100 - - [flang] Lower procedure pointer components (#75453) - - Lower procedure pointer components, except in the context of structure - constructor (left TODO). - - Procedure pointer components lowering share most of the lowering logic - of procedure poionters with the following particularities: - - They are components, so an hlfir.designate must be generated to - retrieve the procedure pointer address from its derived type base. - - They may have a PASS argument. While there is no dispatching as with - type bound procedure, special care must be taken to retrieve the derived - type component base in this case since semantics placed it in the - argument list and not in the evaluate::ProcedureDesignator. - - These components also bring a new level of recursive MLIR types since a - fir.type may now contain a component with an MLIR function type where - one of the argument is the fir.type itself. This required moving the - "derived type in construction" stackto the converter so that the object - and function type lowering utilities share the same state (currently the - function type utilty would end-up creating a new stack when lowering its - arguments, leading to infinite loops). The BoxedProcedurePass also - needed an update to deal with this recursive aspect. - -commit 82e91b91ca0ceab5ee977295540643ce67153f89 -Author: Krzysztof Parzyszek -Date: Fri Dec 15 09:32:57 2023 -0600 - - [flang][OpenMP] Move handling of OpenMP symbol flags to OpenMP.cpp (#75523) - - The function `instantiateVariable` in Bridge.cpp has the following code: - ``` - if (var.getSymbol().test( - Fortran::semantics::Symbol::Flag::OmpThreadprivate)) - Fortran::lower::genThreadprivateOp(*this, var); - - if (var.getSymbol().test( - Fortran::semantics::Symbol::Flag::OmpDeclareTarget)) - Fortran::lower::genDeclareTargetIntGlobal(*this, var); - ``` - - Implement `handleOpenMPSymbolProperties` in OpenMP.cpp, move the above - code there, and have `instantiateVariable` call this function instead. - - This would further separate OpenMP-related details into OpenMP.cpp. - -commit aeb482106c03cb05025f904db69c65dbcfa745fe -Author: Krzysztof Parzyszek -Date: Fri Dec 15 09:01:08 2023 -0600 - - [flang][OpenMP] Move nested eval conversion to OpenMP.cpp, NFC (#75502) - - This is the first step towards exploiting `genEval` functionality from - inside of OpenMP-generating functions. - - This follows discourse discussion: - https://discourse.llvm.org/t/openmp-lowering-from-pft-to-fir/75263 - -commit fedc54bf35b378ab3418ba0f36c1df476aef5aca -Author: Valentin Clement (バレンタイン クレメン) -Date: Thu Dec 14 09:25:27 2023 -0800 - - [flang] Add genEval to the AbstractConverter (#75140) - - There was some discussion on discourse[1] about allowing call to FIR - generation functions from other part of lowering belonging to OpenMP. - - This solution exposes a simple `genEval` member function on the - `AbstractConverter` so that IR generation for PFT Evaluation objects can - be called from lowering outside of the FirConverter but not exposing it. - - [1] https://discourse.llvm.org/t/openmp-lowering-from-pft-to-fir/75263 - -commit e59e848805f57bd52ebbb0f7f7d4d951e6af597c -Author: jeanPerier -Date: Wed Dec 6 14:20:06 2023 +0100 - - [flang] Updating drivers to create data layout before semantics (#73301) - - Preliminary patch to change lowering/code generation to use - llvm::DataLayout information instead of generating "sizeof" GEP (see - https://github.com/llvm/llvm-project/issues/71507). - - Fortran Semantic analysis needs to know about the target type size and - alignment to deal with common blocks, and intrinsics like - C_SIZEOF/TRANSFER. This information should be obtained from the - llvm::DataLayout so that it is consistent during the whole compilation - flow. - - This change is changing flang-new and bbc drivers to: - 1. Create the llvm::TargetMachine so that the data layout of the target - can be obtained before semantics. - 2. Sharing bbc/flang-new set-up of the - SemanticConstext.targetCharateristics from the llvm::TargetMachine. For - now, the actual part that set-up the Fortran type size and alignment - from the llvm::DataLayout is left TODO so that this change is mostly an - NFC impacting the drivers. - 3. Let the lowering bridge set-up the mlir::Module datalayout attributes - since it is doing it for the target attribute, and that allows the llvm - data layout information to be available during lowering. - - For flang-new, the changes are code shuffling: the `llvm::TargetMachine` - instance is moved to `CompilerInvocation` class so that it can be used - to set-up the semantic contexts. `setMLIRDataLayout` is moved to - `flang/Optimizer/Support/DataLayout.h` (it will need to be used from - codegen pass for fir-opt target independent testing.)), and the code - setting-up semantics targetCharacteristics is moved to - `Tools/TargetSetup.h` so that it can be shared with bbc. - - As a consequence, LLVM targets must be registered when running - semantics, and it is not possible to run semantics for a target that is - not registered with the -triple option (hence the power pc specific - modules can only be built if the PowerPC target is available. - -commit 3aba9264b38c1aa3a991065305c0a04988432692 -Author: vdonaldson <37090318+vdonaldson@users.noreply.github.com> -Date: Mon Dec 4 09:55:54 2023 -0800 - - [flang] IEEE_ARITHMETIC and IEEE_EXCEPTIONS intrinsic module procedures (#74138) - - Implement a selection of intrinsic module procedures that involve - exceptions. - - - IEEE_GET_FLAG - - IEEE_GET_HALTING_MODE - - IEEE_GET_MODES - - IEEE_GET_STATUS - - IEEE_LOGB - - [f23] IEEE_MAX, IEEE_MAX_MAG, IEEE_MAX_NUM, IEEE_MAX_NUM_MAG - - [f23] IEEE_MIN, IEEE_MIN_MAG, IEEE_MIN_NUM, IEEE_MIN_NUM_MAG - - IEEE_QUIET_EQ, IEEE_QUIET_GE, IEEE_QUIET_GT, - - IEEE_QUIET_LE, IEEE_QUIET_LT, IEEE_QUIET_NE - - IEEE_SET_FLAG - - IEEE_SET_HALTING_MODE - - IEEE_SET_MODES - - IEEE_SET_STATUS - - IEEE_SIGNALING_EQ, IEEE_SIGNALING_GE, IEEE_SIGNALING_GT, - - IEEE_SIGNALING_LE, IEEE_SIGNALING_LT, IEEE_SIGNALING_NE - - IEEE_SUPPORT_FLAG - - IEEE_SUPPORT_HALTING - -commit dd376f859526d9023c879e880f380158050daa5b -Author: Krzysztof Parzyszek -Date: Mon Dec 4 08:27:57 2023 -0600 - - [flang] Fix move-assign operator for struct IncrementLoopInfo (#74137) - -commit a9a5af82704d772509ccef87991384f47b65884d -Author: Valentin Clement (バレンタイン クレメン) -Date: Thu Nov 30 14:25:03 2023 -0800 - - [flang][openacc] Support early return in acc.loop (#73841) - - Early return is accepted in OpenACC loop not directly nested in a - compute construct. Since acc.loop operation has a region, the - `func.return` operation cannot be directly used inside the region. - An early return is materialized by an `acc.yield` operation returning a - `true` value. The standard end of the `acc.loop` region yield a `false` - value in this case. - A conditional branch operation on the `acc.loop` result will branch to - the `finalBlock` or just to the continue block whether an early exit was - produce in the acc.loop. - -commit 0ccef6a723cbfe3d72b85d34963badb37a6a9a53 -Author: Mats Petersson -Date: Wed Nov 29 16:15:43 2023 +0000 - - [flang] Make adapt.valuebyref attribute work again (#73658) - - This got "lost" in the HLFIR transformation. This patch applies the old - attribute to the AssociateOp that needs it, and forwards it to the - AllocaOp that is generated when lowering to FIR. - -commit af09219edd87db860d1fc5a33dd49ecd31291699 -Author: Daniel Chen -Date: Wed Nov 22 11:51:12 2023 -0500 - - [Flang] Add partial support for lowering procedure pointer assignment. (#70461) - - **Scope of the PR:** - 1. Lowering global and local procedure pointer declaration statement - with explicit or implicit interface. The explicit interface can be from - an interface block, a module procedure or an internal procedure. - 2. Lowering procedure pointer assignment, where the target procedure - could be external, module or internal procedures. - 3. Lowering reference to procedure pointers so that it works end to end. - - **PR notes:** - 1. The first commit of the PR does not include testing. I would like to - collect some comments first, which may alter the output. Once I confirm - the implementation, I will add some testing as a follow up commit to - this PR. - 2. No special handling of the host-associated entities when an internal - procedure is the target of a procedure pointer assignment in this PR. - - **Implementation notes:** - 1. The implementation is using the HLFIR path. - 2. Flang currently uses `getUntypedBoxProcType` to get the - `fir::BoxProcType` for `ProcedureDesignator` when getting the address of - a procedure in order to pass it as an actual argument. This PR inherits - the same design decision for procedure pointer as the `fir::StoreOp` - requires the same memory type. - - Note: this commit is actually resubmitting the original commit from - PR #70461 that was reverted. See PR #73221. - -commit 49f55d107548a340992eaec1b9767c0f8fc443cd -Author: Muhammad Omair Javaid -Date: Thu Nov 23 12:29:35 2023 +0500 - - Revert "[Flang] Add partial support for lowering procedure pointer assignment. (#70461)" - - This reverts commit e07fec10ac208c2868a24c5c0be88e45778b297e. - - This change appears to have broken following buildbots: - https://lab.llvm.org/buildbot/#/builders/176 - https://lab.llvm.org/buildbot/#/builders/179 - https://lab.llvm.org/buildbot/#/builders/184 - https://lab.llvm.org/buildbot/#/builders/197 - https://lab.llvm.org/buildbot/#/builders/198 - - All bots fails in testsuite where following tests seems broken: - (eg: https://lab.llvm.org/buildbot/#/builders/176/builds/7131) - - test-suite::gfortran-regression-compile-regression__proc_ptr_46_f90.test - test-suite::gfortran-regression-compile-regression__proc_ptr_37_f90.test - -commit e07fec10ac208c2868a24c5c0be88e45778b297e -Author: Daniel Chen -Date: Wed Nov 22 11:51:12 2023 -0500 - - [Flang] Add partial support for lowering procedure pointer assignment. (#70461) - - **Scope of the PR:** - 1. Lowering global and local procedure pointer declaration statement - with explicit or implicit interface. The explicit interface can be from - an interface block, a module procedure or an internal procedure. - 2. Lowering procedure pointer assignment, where the target procedure - could be external, module or internal procedures. - 3. Lowering reference to procedure pointers so that it works end to end. - - **PR notes:** - 1. The first commit of the PR does not include testing. I would like to - collect some comments first, which may alter the output. Once I confirm - the implementation, I will add some testing as a follow up commit to - this PR. - 2. No special handling of the host-associated entities when an internal - procedure is the target of a procedure pointer assignment in this PR. - - **Implementation notes:** - 1. The implementation is using the HLFIR path. - 2. Flang currently uses `getUntypedBoxProcType` to get the - `fir::BoxProcType` for `ProcedureDesignator` when getting the address of - a procedure in order to pass it as an actual argument. This PR inherits - the same design decision for procedure pointer as the `fir::StoreOp` - requires the same memory type. - -commit a3700cc29da8fc48361256609bc0903ff94106c7 -Author: Valentin Clement (バレンタイン クレメン) -Date: Tue Nov 14 14:42:11 2023 -0800 - - [flang][openacc] Make implicit declare region unstructured (#71591) - - Using an op with a region cause some issue with unstructured code. This - patch make use of acc.declare_enter and acc.declare_exit to represent - the implicit declare region. - -commit 1c91d9bdea3b6c38e8fbce46ec8181a9c0aa26f8 -Author: Peter Klausler <35819229+klausler@users.noreply.github.com> -Date: Mon Nov 13 16:13:50 2023 -0800 - - [flang] Ensure that portability warnings are conditional (#71857) - - Before emitting a warning message, code should check that the usage in - question should be diagnosed by calling ShouldWarn(). A fair number of - sites in the code do not, and can emit portability warnings - unconditionally, which can confuse a user that hasn't asked for them - (-pedantic) and isn't terribly concerned about portability *to* other - compilers. - - Add calls to ShouldWarn() or IsEnabled() around messages that need them, - and add -pedantic to tests that now require it to test their portability - messages, and add more expected message lines to those tests when - -pedantic causes other diagnostics to fire. - -commit 91f92e6a6bfa4a1d963234ba70adb5a7957aeb1e -Author: NimishMishra <42909663+NimishMishra@users.noreply.github.com> -Date: Fri Nov 10 00:17:47 2023 -0800 - - [flang][OpenMP] Fix common block missing symbol crash (#67330) - - Fixes #65034 by skipping copy of host-association information if the - concerned symbol is missing from the inner construct - -commit d0ef94bc83019f7cb92a33b545782294ffdcfd04 -Author: Kiran Chandramohan -Date: Tue Nov 7 11:53:30 2023 +0000 - - Revert "Revert "[Flang][OpenMP] Fix to support privatisation of alloc strings (#71204)"" - - This reverts commit ba116ff41d525a4b6c931664f1b4437a7dd55b1d. - - This relands https://github.com/llvm/llvm-project/pull/71204 with a fix - in the test. - -commit ba116ff41d525a4b6c931664f1b4437a7dd55b1d -Author: Kiran Chandramohan -Date: Tue Nov 7 11:50:24 2023 +0000 - - Revert "[Flang][OpenMP] Fix to support privatisation of alloc strings (#71204)" - - This reverts commit 192bee0db1dfb8b2f570031102a2326037b75fa1. - Reverting to fix the CI. https://lab.llvm.org/buildbot/#/builders/21/builds/84995 - -commit 192bee0db1dfb8b2f570031102a2326037b75fa1 -Author: Kiran Chandramohan -Date: Tue Nov 7 11:40:25 2023 +0000 - - [Flang][OpenMP] Fix to support privatisation of alloc strings (#71204) - -commit 7046202c3dde093420c08e40116568e76a48ee59 -Author: jeanPerier -Date: Fri Oct 27 09:07:48 2023 +0200 - - [flang] Move whole allocatable assignment implicit conversion to lowering (#70317) - - The front-end is making implicit conversions explicit in assignment and - structure constructors. - - While this generally helps and is needed by semantics to fold structure - constructors correctly, this is incorrect when the LHS or component is - an allocatable. The RHS may have non default lower bounds that should be - propagated to the LHS, and making the conversion explicit changes the - semantics. In the structure constructor, the situation is even worse - since Fortran 2018 7.5.10 point 7 allows the value to be a reference to - an unallocated allocatable, and adding an explicit conversion in - semantics will cause a segfault. - - This patch removes the explicit convert in semantics when the - LHS/component is a whole allocatable, and update lowering to deal with - the conversion insertion, dealing with preserving the lower bounds and - the tricky structure constructor case. - -commit b6b0756ce5c4e2e07d7f6f1f430d3d29afe9a8a8 -Author: jeanPerier -Date: Wed Oct 25 09:22:23 2023 +0200 - - [flang] Allow lowering of sub-expressions to be overridden (#69944) - - OpenACC/OpenMP atomic lowering needs a finer control over expression - lowering. This patch allows mapping evaluate::Expr to mlir::Value so - that any subsequent expression lowering will use these values when an - operand is a mapped Expr. - - This is an alternative to - https://github.com/llvm/llvm-project/pull/69866 From which I took the - test and some of the logic to extract the non-atomic sub-expression. - - --------- - - Co-authored-by: Nimish Mishra - -commit 828674395b1997c01acd9c560646d909b9cc3615 -Author: Valentin Clement (バレンタイン クレメン) -Date: Tue Oct 24 09:17:48 2023 -0700 - - [flang][openacc] Allow acc routine at the top level (#69936) - - Some compilers allow the `$acc routine()` to be placed at the - program unit level. To be compatible, this patch enables the use of acc - routine at this level. These acc routine directives must have a name. - -commit 2ef370b7716b39390736e181d2eaabd740e1d59d -Author: jeanPerier -Date: Fri Oct 20 11:11:52 2023 +0200 - - [flang][openmp] Update copyHostAssociateVar to use hlfir.assign for HLFIR (#69441) - - The code in `copyHostAssociateVar` is using `createSomeArrayAssignment` - for arrays which is using the soon legacy expression lowering. Update - the copy to use hlfir.assign instead. - - I used the temporary_lhs flag to mimic the current behavior, but maybe - user defined assignment should be called when needed .This flag also - prevents any finalizers to be called on the LHS if the LHS type has - finalizers (which would occur otherwise in normal intrinsic assignment). - Again, I am not sure what the OpenMP spec wants here. - - Also, I added special handling for ALLOCATABLE, the current code seems - broken to me since it is basically copying the descriptor which would - lead to memory leak given the TEMP was previously allocated with the - shape of the variable in createHostAssociateVarClone. So copying the - DATA instead seemed like the right thing to do. - -commit bfcd05317d0fbe90474eda13a4dbf33c2cee4130 -Author: jeanPerier -Date: Tue Oct 17 09:11:53 2023 +0200 - - [flang][hlfir] Do not emit extra declare for dummy used in BLOCK (#69184) - - When a variable is used in a specification expression in a scope, it is - added to the list of variables that must be instantiated when lowering - the scope. When lowering a BLOCK, this caused instantiateVar to be - called again on all the host block variables appearing in block variable - specification expressions. This caused an extra declare to be emitted - for dummy inside block (for non dummy, instantiateVar is a no-op if the - symbol is already mapped). - - Only call instantiateVar if the symbol is not mapped when lowering BLOCK - variables. - -commit 5db4779c3f07b6f562339722c176fb58329652ac -Author: Pete Steinfeld <47540744+psteinfeld@users.noreply.github.com> -Date: Mon Oct 16 12:37:57 2023 -0700 - - [flang] Regularize TODO messages for coarray related features (#69227) - - I want to make "not yet implemented" messages for features related to - coarrays easy to identify and make them easy for users to read. - -commit 4ccd57ddb11e833f6b2ec2188e73c4ef3a5ab80e -Author: jeanPerier -Date: Fri Oct 6 09:29:57 2023 +0200 - - [flang][nfc] replace fir.dispatch_table with more generic fir.type_info (#68309) - - The goal is to progressively propagate all the derived type info that is - currently in the runtime type info globals into a FIR operation that can - be easily queried and used by FIR/HLFIR passes. - - When this will be complete, the last step will be to stop generating the - runtime info global in lowering, but to do that later in or just before - codegen to keep the FIR files readable (on the added type-info.f90 - tests, the lowered runtime info globals takes a whooping 2.6 millions - characters on 1600 lines of the FIR textual output. The fir.type_info that - contains all the info required to generate those globals for such - "trivial" types takes 1721 characters on 9 lines). - - So far this patch simply starts by replacing the fir.dispatch_table - operation by the fir.type_info operation and to add the noinit/ - nofinal/nodestroy flags to it. These flags will soon be used in HLFIR to - better rewrite hlfir.assign with derived types. - -commit 43d2ef2856fc3373068c020efa11a933477e11fa -Author: jeanPerier -Date: Tue Sep 26 20:33:01 2023 +0200 - - [flang][lowering] propagate location info of macro expansions (#67446) - - Currently flang-new -g is failing when compiling code containing a call - in a macro to a function defined in the same file. - - The verification added in https://reviews.llvm.org/D157447 is valid, - flang lowering was failing to propagate location information in code - from macro expansion because GetSourcePositionRange does not work with - them (it fails to come with an end location), but we do not need a range - for the MLIR location, only the start. - - Use GetSourcePosition instead that works with code from macro expansion. - - Note that the source location is the one of the statement where the - macro appeared, if needed some FusedLocation could be later built to - keep a link to the macro location in the debug info. - -commit 2cb31fe8ea7bbe3c4fce0f03f8126341a353d01b -Author: jeanPerier -Date: Thu Sep 21 18:38:23 2023 +0200 - - [flang] Centralize automatic deallocation code in lowering (#67003) - - There are currently several places that automatically deallocate - allocatble if they are allocated: - - INTENT(OUT) allocatable are deallocated on entry in the callee - - INTENT(OUT) allocatable are also deallocated on the caller side of - BIND(C) function in case the implementation is in C. - - Results of function returning allocatable are deallocated after usage. - - OPENMP privatized allocatable are deallocated at the end of OPENMP - region. - - Introduce genDeallocateIfAllocated that centralize all this code, except - for the function return that use genFreememIfAllocated since - finalization is done separately currently. - - `fir::factory::genFinalization` and - `fir::factory::genInlinedDeallocation` are removed and replaced by - genFreemem since their name were misleading: finalization was not - called. - - There is a fallout in the tests because previous generated code did not - check the allocated status when doing inline deallocation. This was OK - since free(null) is guaranteed to be a no-op, but this makes compiler - code more complex, is a bit surprising in the generated IR IMHO, and it - relied on knowing when genDeallocateBox inserts runtime calls or uses - inlined code. - -commit 3dbb055f54e705d125e1fd30db463e7aff8bbeff -Author: Leandro Lupori -Date: Thu Sep 21 15:59:35 2023 +0200 - - [flang] Generate valid IR on GOTO DO body (#66084) - - Flang was generating invalid IR when there was a GOTO to the body - of a DO loop. This happened because the value of step, computed at - the beginning of the loop, was being reused at the end of the loop, - that, for unstructured loops, is in another basic block. Because of - this, a GOTO could skip the beginning of the loop, that defined - step, and yet try to use it at the end of the loop, which is - invalid. - - Instead of reusing the step value, it can be recomputed if it is a - constant, or stored and loaded to/from a temporary variable, for - non-constant step expressions. - - Note that, while this change prevents the generation of invalid IR - on the presence of jumps to DO loop bodies, what happens if the - program reaches the end of a DO loop without ever passing through - its beginning is undefined behavior, as some control variables, - such as trip, will be uninitialized. It doesn't seem worth the - effort and overhead to ensure this legacy extension will behave - correctly in this case. This is consistent with at least gfortran, - that doesn't behave correctly if step is not equal to one. - - Fixes: https://github.com/llvm/llvm-project/issues/65036 - -commit 8fde6f41a0e5a2b280e46521ed2236fab5c03412 -Author: Andrew Gozillon -Date: Tue Sep 19 08:00:40 2023 -0500 - - [Flang][OpenMP] Add lowering from PFT to new MapEntry and Bounds operations and tie them to relevant Target operations - - This patch builds on top of a prior patch in review which adds a new map - and bounds operation by modifying the OpenMP PFT lowering to support - these operations and generate them from the PFT. - - A significant amount of the support for the Bounds operation is borrowed - from OpenACC's own current implementation and lowering, just ported - over to OpenMP. - - The patch also adds very preliminary/initial support for lowering to - a new Capture attribute, which is stored on the new Map Operation, - which helps the later lowering from OpenMP -> LLVM IR by indicating - how a map argument should be handled. This capture type will - influence how a map argument is accessed on device and passed by - the host (different load/store handling etc.). It is reflective of a - similar piece of information stored in the Clang AST which performs a - similar role. - - As well as some minor adjustments to how the map type (map bitshift - which dictates to the runtime how it should handle an argument) is - generated to further support more use-cases for future patches that - build on this work. - - Finally it adds the map entry operation creation and tying it to the relevant - target operations as well as the addition of some new tests and alteration - of previous tests to support the new changes. - - Depends on D158732 - - reviewers: kiranchandramohan, TIFitis, clementval, razvanlupusoru - - Differential Revision: https://reviews.llvm.org/D158734 - -commit 47025af6399aa29a045275349b04aaffaa918d1b -Author: Slava Zakharin -Date: Mon Sep 18 09:59:06 2023 -0700 - - [flang][hlfir] Alias analysis for host associated accesses. (#65919) - - This patch adds `host_assoc` attribute for operations that implement - FortranVariableInterface (e.g. `hlfir.declare`). The attribute is used - by the alias analysis to make better conclusions about memory overlap. - For example, a dummy argument of an inner subroutine and a host's - variable used inside the inner subroutine cannot refer to the same - object (if the dummy argument does not satisify exceptions in F2018 - 15.5.2.13). - This closes a performance gap between HLFIR optimization pipeline - and FIR ArrayValueCopy for Polyhedron/nf. - -commit 99a54b839a441a6e9dd9106c5fd9e547cf1309e5 -Author: jeanPerier -Date: Mon Sep 18 14:59:56 2023 +0200 - - [flang] Lower PRIVATE component names safely (#66076) - - It is possible for a derived type extending a type with private - components to define components with the same name as the private - components. - - This was not properly handled by lowering where several fir.record type - component names could end-up being the same, leading to bad generated - code (only the first component was accessed via fir.field_index, leading - to bad generated code). - - This patch handles the situation by adding the derived type mangled name - to private component. - -commit 29aa749087be38d3e5a3a37e0b8e8ab74e9f79aa -Author: Sergio Afonso -Date: Wed Mar 29 18:13:48 2023 +0100 - - [OpenMP][Flang][MLIR] Lowering of OpenMP requires directive from parse tree to MLIR - - This patch implements the lowering of the OpenMP 'requires' directive - from Flang parse tree to MLIR attributes attached to the top-level - module. - - Target-related 'requires' clauses are gathered and combined for each top-level - unit during semantics. Lastly, a single module-level `omp.requires` attribute - is attached to the MLIR module with that information at the end of the process. - - The `atomic_default_mem_order` clause is not addressed by this patch, but - rather it will come as a separate patch and follow a different approach. - - Depends on D147214, D150328, D150329 and D157983. - - Differential Revision: https://reviews.llvm.org/D147218 - -commit e070ea47a991d2b4a135f6bfb761b19013d7f6af -Author: Razvan Lupusoru -Date: Mon Sep 11 13:58:10 2023 -0700 - - [flang][openacc] Enable lowering support for OpenACC atomic operations (#65776) - - Since the OpenACC atomics specification is a subset of OpenMP atomics, - the same lowering implementation can be used. This change extracts out - the necessary pieces from the OpenMP lowering and puts them in a shared - spot. The shared spot is a header file so that each implementation can - template specialize directly. - - After putting the OpenMP implementation in a common spot, the following - changes were needed to make it work for OpenACC: - * Ensure parsing works correctly by avoiding hardcoded offsets. - * Templatize based on atomic type. - * The checking whether it is OpenMP or OpenACC is done by checking for - OmpAtomicClauseList (OpenACC does not implement this so we just - templatize with void). It was preferable to check this instead of atomic - type because in some cases, like atomic capture, the read/write/update - implementations are called - and we want compile time evaluation of - these conditional parts. - * The memory order and hint are used only for OpenMP. - * Generate acc dialect operations instead of omp dialect operations. - -commit 6ffea74f7c2cda7de91879a771daa6d45da198d9 -Author: jeanPerier -Date: Fri Sep 8 10:43:55 2023 +0200 - - [flang] Use BIND name, if any, when consolidating common blocks (#65613) - - This patch changes how common blocks are aggregated and named in - lowering in order to: - - * fix one obvious issue where BIND(C) and non BIND(C) with the same - Fortran name were "merged" - - * go further and deal with a derivative where the BIND(C) C name matches - the assembly name of a Fortran common block. This is a bit unspecified - IMHO, but gfortran, ifort, and nvfortran "merge" the common block - without complaints as a linker would have done. This required getting - rid of all the common block mangling early in FIR (\_QC) instead of - leaving that to the phase that emits LLVM from FIR because BIND(C) - common blocks did not have mangled names. Care has to be taken to deal - with the underscoring option of flang-new. - - See added flang/test/Lower/HLFIR/common-block-bindc-conflicts.f90 for an - illustration. - -commit 20f4a5a313e58b15bdbf74c4773931c6baa96884 -Author: Valentin Clement (バレンタイン クレメン) -Date: Thu Sep 7 14:54:38 2023 -0700 - - [flang][openacc][NFC] Clean up lowering api (#65678) - - Remove unused argument `pft::Evaluation` from higher level lowering API. - -commit f8843efbb2190db85c696001ffd6211a2c20ac37 -Author: Slava Zakharin -Date: Thu Sep 7 11:41:22 2023 -0700 - - [flang][hlfir] Lower Cray pointee references. (#65563) - - A Cray pointee reference must be done using the characteristics - (bounds, type params) of the original pointee declaration, but - using the actual address value of the associated Cray pointer. - There might be multiple Cray pointees associated with the same - Cray pointer. - - The proposed solution is to lower each Cray pointee into a POINTER - variable with a descriptor. The descriptor is initialized at the point - of declaration of the pointee, though its base_addr is set to null. - Before each reference of the Cray pointee its descriptor's base_addr - is updated to the current value of the Cray pointer. - - The update of the base_addr is done using PointerAssociateScalar - runtime call, which just updates the base_addr of the descriptor. - This is a temporary solution just to make Cray pointers work - to the same extent they work with FIR lowering. - -commit d26c78b2ad5ed0f3384d7a3ef4b4d894f2b1be3e -Author: jeanPerier -Date: Wed Sep 6 09:07:45 2023 +0200 - - [flang] handle indirect module variable use in internal procedure (#65324) - - When a module variable is referenced inside an internal procedure, but - the use statement for the module is inside the host, semantics may not - create any symbols with HostAssocDetails directly under the internal - procedure scope. - So pft::getScopeVariableList, that is called in the bridge when lowering - the internal procedure scope, failed to instantiate the module - variables. This lead to "symbol is not mapped to any IR value" compile - time errors. - - This patch fixes the issue by adding the variables to the list of - "captured" global variables from the host program, so that they are - instantiated as part of the `internalProcedureBindings` in the bridge. - - The rational of doing it that way instead of changing - `getScopeVariableList` is that `getScopeVariableList` would have to - import all the module variables used inside the host since it cannot - know which ones are referenced inside the internal procedure from the - semantics::Scope information. The fix in this patch only instantiates - the module variables from the host that are actually referenced inside - the internal procedure. - -commit de8939ffca277670613904872e55a9a4d9f19b94 -Author: Slava Zakharin -Date: Tue Sep 5 10:26:16 2023 -0700 - - [flang] Reset lbounds for allocatable function results. (#65286) - - With HLFIR the lbounds for the ALLOCATABLE result are taken from the - mutable box created for the result, so the non-default lbounds might be - propagated further causing incorrect result, e.g.: - ``` - program p - real, allocatable :: p5(:) - allocate(p5, source=real_init()) - print *, lbound(p5, 1) ! must print 1, but prints 7 - contains - function real_init() - real, allocatable :: real_init(:) - allocate(real_init(7:8)) - end function real_init - end program p - ``` - - With FIR lowering the box passed for `source` has explicit lower bound 1 - at the call site, but the runtime box initialized by `real_init` call - still has lower bound 7. I am not sure if the runtime box initialized by - `real_init` will ever be accessed in a debugger via Fortran variable - names, but I think that having the right runtime bounds that can be - accessible via examining registers/stack might be good in general. So I - decided to update the runtime bounds at the point of return. - - This change fixes the test above for HLFIR. - - Reviewed By: jeanPerier - - Differential Revision: https://reviews.llvm.org/D156187 - -commit 90f58eb37b30cc2f5222053dc6e7e0a187819431 -Author: Kiran Chandramohan -Date: Fri Sep 1 10:44:35 2023 +0000 - - [Flang][OpenMP] Fix loop index privatisation in Parallel region in HLFIR - - HLFIR lowering always adds hlfir.declare when symbols are bound to their - address allocated on the stack. Ensure that the declare is placed along - with the alloca if it is hoisted. And always return the mlir value that - is bound to the symbol (i.e the alloca in FIR lowering and the declare - in HLFIR lowering). - - Context: Loop index variables in OpenMP parallel regions should be - privatised to work correctly. - - Reviewed By: tblah - - Differential Revision: https://reviews.llvm.org/D158594 - -commit 031b4e5e795a72e23c69da3d06ae7a958d217a8e -Author: Peter Klausler -Date: Mon Aug 21 12:21:49 2023 -0700 - - [flang] Support SELECT RANK on allocatables & pointers - - Unlike other executable constructs with associating selectors, the - selector of a SELECT RANK construct can have the ALLOCATABLE or POINTER - attribute, and will work as an allocatable or object pointer within - each rank case, so long as there is no RANK(*) case. - - Getting this right exposed a correctness risk with the popular - predicate IsAllocatableOrPointer() -- it will be true for procedure - pointers as well as object pointers, and in many contexts, a procedure - pointer should not be acceptable. So this patch adds the new predicate - IsAllocatableOrObjectPointer(), and updates some call sites of the original - function to use the new one. - - Differential Revision: https://reviews.llvm.org/D159043 - -commit a678ed41d24983bd1fb78b98fd790e3381979d44 -Author: Kazu Hirata -Date: Sun Aug 27 08:26:48 2023 -0700 - - [flang] Use DenseMap::lookup (NFC) - -commit 8b834caa62a279a0b4136bf3c8950b4f7162308e -Author: Kiran Chandramohan -Date: Wed Aug 23 11:37:00 2023 +0000 - - [Flang][OpenMP] Fix HLFIR lowering for commonblock threadprivate - - Commonblock names are not variables, but they can be marked as - threadprivate in OpenMP. This requires the commonblock name to - be bound to the address of the Commonblock. hlfir.declares are - not required for these, but we should be able to retrieve the - mlir Value corresponding to the Commonblock. This patch enables - this by special casing the Commonblocks like procedures. - - Reviewed By: tblah, vzakhari - - Differential Revision: https://reviews.llvm.org/D158070 - -commit a1c736ec08f25e83552b20c94a5b2afdcd021a40 -Author: Mark Danial -Date: Tue Aug 22 12:10:08 2023 -0400 - - [Flang] Cray pointer Lowering - - This patch is to add cray pointer (aka integer pointer) support to flang. Syntax and semantic checking were already available in flang. - Cray pointers reference (https://gcc.gnu.org/onlinedocs/gfortran/Cray-pointers.html) - - In order to implement the feature we create the following sequence for a simple scalar load and store: - - ``` - integer pte, i - pointer(ptr, pte) - i = pte - ``` - - ``` - %1 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFEi"} - %2 = fir.alloca i32 {bindc_name = "pte", uniq_name = "_QFEpte"} - %3 = fir.alloca i64 {bindc_name = "ptr", uniq_name = "_QFEptr"} - ... - %7 = fir.embox %3 : (!fir.ref) -> !fir.box - %8 = fir.box_addr %7 : (!fir.box) -> !fir.ref - %9 = fir.convert %8 : (!fir.ref) -> !fir.ref> - %10 = fir.load %9 : !fir.ref> - %11 = fir.load %10 : !fir.ptr - fir.store %11 to %1 : !fir.ref - ``` - - ``` - integer pte, i - pointer(ptr, pte) - pte = i - ``` - - ``` - %1 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFEi"} - %2 = fir.alloca i32 {bindc_name = "pte", uniq_name = "_QFEpte"} - %3 = fir.alloca i64 {bindc_name = "ptr", uniq_name = "_QFEptr"} - - %7 = fir.load %1 : !fir.ref - %8 = fir.embox %3 : (!fir.ref) -> !fir.box - %9 = fir.box_addr %8 : (!fir.box) -> !fir.ref - %10 = fir.convert %9 : (!fir.ref) -> !fir.ref> - %11 = fir.load %10 : !fir.ref> - fir.store %7 to %11 : !fir.ptr - ``` - The sequence is very similar for array element cases with the addition of fir.coordinate_of for the specific element. - The whole array case is slightly different but uses the same sequence before the fir.array_load and fir.array_merge_store. - - Reviewed By: kkwli0 - - Differential Revision: https://reviews.llvm.org/D151478 - -commit 4d04baeca5d8ce0f098e4d19317c26c30e773747 -Author: Valentin Clement -Date: Mon Aug 21 12:38:18 2023 -0700 - - [flang][openacc] Lower acc declare to the new acc.declare function - - Lower the acc delcare directive in function/subroutine - to the newly introduced acc.declare operation. Only a single - acc.declare operation is procduced in a function or subroutine - so they don't end up nested. - - Depends on D158314 - - Reviewed By: razvanlupusoru - - Differential Revision: https://reviews.llvm.org/D158315 - -commit 69a6bd5f052f076c72ef6f2a39a13f725acdb13a -Author: Valentin Clement -Date: Thu Aug 17 14:25:05 2023 -0700 - - [flang][openacc] Lower acc routine with function name - - The routine directive can appear in the specification part of - a subroutine, function or module and therefore appear before the - function or subroutine is lowered. We keep track of the created - routine info attribute and attach them to the function at the end - of the lowering if the directive appeared before the function was - lowered. - - Reviewed By: razvanlupusoru - - Differential Revision: https://reviews.llvm.org/D158204 - -commit 335b3990ef9115e3b20eb9dfa32393a7fdfde4e3 -Author: V Donaldson -Date: Mon Aug 7 13:29:17 2023 -0700 - - [flang] Do concurrent locality specifiers - -commit 14741ef88f2a00f4b5f92b981e1aec04bfa08d36 -Author: Valentin Clement -Date: Tue Aug 1 14:10:16 2023 -0700 - - [flang][openacc] Lower the exit part for OpenACC declare in function/subroutine - - This patch adds lowering for the exit part of the OpenACC declare construct - in function/subroutine. - - Depends on D156560 - - Reviewed By: razvanlupusoru - - Differential Revision: https://reviews.llvm.org/D156568 - -commit f752265231c2d15590a53e45bcc850acf2450dfc -Author: Nimish Mishra -Date: Mon Jul 31 16:34:30 2023 +0530 - - [flang][OpenMP] Support for privatization in common block - - This patch provides support for usage of common block - in private/firstprivate and lastprivate clauses. - - Reviewed By: kiranchandramohan - - Differential Revision: https://reviews.llvm.org/D156120 - -commit b4c54b20270ea1f0eac574785156c668930da5c5 -Author: Peixin Qiao -Date: Mon Jul 31 15:59:20 2023 +0530 - - [flang][OpenMP] Support common block in OpenMP private clause - - This supports the common block in OpenMP privat clause by making - each common block member host-associated privatization and - adds the test case. - - Reviewed By: kiranchandramohan - - Differential Revision: https://reviews.llvm.org/D127215 - -commit c217ff8794c2a710ef772ace1119ee773a182e9a -Author: Valentin Clement -Date: Wed Jul 26 09:55:57 2023 -0700 - - [flang][openacc] Add basic lowering for OpenACC declare construct in module - - This patch adds the skeleton and the basic lowering for OpenACC declare - construct when located in the module declaration. This patch just lower the - create clause with or without modifier. Other clause and global descrutor - lowering will come in follow up patches to keep this one small enough for - review. - - Reviewed By: razvanlupusoru - - Differential Revision: https://reviews.llvm.org/D156266 - -commit e909a2c1ca4d1f37a28293e6607136888ed889db -Author: Andrew Gozillon -Date: Thu Jul 13 11:41:06 2023 -0500 - - [Flang][OpenMP][Lower] Program level implicit SAVE variable handling for declare target - - This is an attempt at mimicing the method in which - threadprivate handles the following type of variables: - - program main - integer :: i - !$omp declare target to(i) - end - - Which essentially generates a GlobalOp for the variable (which - would normally only be an alloca) when it's instantiated. The - main difference is there is no operation generated within the - function, instead the declare target attribute is appended - later within handleDeclareTarget. - - Reviewers: kiranchandramohan - - Differential Revision: https://reviews.llvm.org/D152037 - -commit 45a9604417dbcb73d1b2fb3d7f8824a97d4b00c1 -Author: Jan Sjodin -Date: Mon Jul 10 10:55:47 2023 -0400 - - [Flang][OpenMP][MLIR] Add early outlining pass for omp.target operations to flang - - This patch implements an early outlining transform of omp.target operations in - flang. The pass is needed because optimizations may cross target op region - boundaries, but with the outlining the resulting functions only contain a - single omp.target op plus a func.return, so there should not be any opportunity - to optimize across region boundaries. - - The patch also adds an interface to be able to store and retrieve the parent - function name of the original target operation. This is needed to be able to - create correct kernel function names when lowering to LLVM-IR. - - Reviewed By: kiranchandramohan, domada - - Differential Revision: https://reviews.llvm.org/D154879 - -commit bc4586da6ef349b2777f28c0cd9b8b0f8faba125 -Author: Dmitriy Smirnov -Date: Mon Jul 3 16:31:20 2023 +0000 - - [Flang][OpenMP] Lower allocatable or pointer in private clause - - This patch lowers allocatables and pointers named in "private" OpenMP clause. - - Reviewed By: kiranchandramohan - - Differential Revision: https://reviews.llvm.org/D148570 - -commit 9bf50936237ded3fc324f4686dc4c1c5a9fb16eb -Author: Ethan Luis McDonough -Date: Fri Jun 30 15:32:21 2023 -0500 - - [flang][openmp] Parallel reduction FIR lowering - - This patch extends the logic for lowering loop construct reductions to parallel block reductions. - - Reviewed By: kiranchandramohan - - Differential Revision: https://reviews.llvm.org/D154182 - -commit 09ea692d166af42cda43bd24d42a6c67a12cce5a -Author: V Donaldson -Date: Thu Jun 29 11:32:56 2023 -0700 - - [flang] IEEE_ARITHMETIC intrinsic module procedures - - Implement - - - IEEE_CLASS - - IEEE_COPY_SIGN - - IEEE_GET_ROUNDING_MODE - - IEEE_IS_FINITE - - IEEE_IS_NAN - - IEEE_IS_NEGATIVE - - IEEE_IS_NORMAL - - IEEE_SET_ROUNDING_MODE - - IEEE_SIGNBIT - - IEEE_SUPPORT_ROUNDING - - IEEE_UNORDERED - - IEEE_VALUE - - for all REAL kinds (2, 3, 4, 8, 10, 16) where applicable. - -commit 7b4aa95d7c5e313ffb8028f627fe0480d66ef650 -Author: Slava Zakharin -Date: Thu Jun 29 10:39:52 2023 -0700 - - [flang][hlfir] Set/propagate 'unordered' attribute for elementals. - - This patch adds 'unordered' attribute handling the HLFIR elementals' - builders and fixes the attribute handling in lowering and transformations. - - Depends on D154031, D154032 - - Reviewed By: jeanPerier, tblah - - Differential Revision: https://reviews.llvm.org/D154035 - -commit e12ffe6a93505e590158ddd8cc73a4f201bbf0aa -Author: Peter Klausler -Date: Fri Jun 23 11:01:33 2023 -0700 - - [flang] Honor #line and related preprocessing directives - - Extend the SourceFile class to take account of #line directives - when computing source file positions for error messages. - Adjust the output of #line directives to -E output so that they - reflect any #line directives that were in the input. - - Differential Revision: https://reviews.llvm.org/D153910 - -commit 23fbe525ce0645341610b751184882fea264c99e -Author: Jean Perier -Date: Wed Jun 28 08:27:16 2023 +0200 - - [flang] do not merge block after lowering - - Lowering relies on dead code generation / unreachable block deletion - to delete some code that is potentially invalid. - - However, calling mlir::simplifyRegion also merges block, which may - promote SSA values to block arguments. Not all FIR types are intended - to be block arguments. - The added test shows an example where block merging led to - fir.shape<> being block arguments (and a failure later in codegen). - - Reviewed By: tblah, clementval, vdonaldson - - Differential Revision: https://reviews.llvm.org/D153858 - -commit 67169233322397e01cfdcdbf8131d77d38a41be0 -Author: Jean Perier -Date: Mon Jun 26 13:06:43 2023 +0200 - - [flang][hlfir] Lower user defined assignment - - Lower user defined assignment inside the hlfir.region_assign - "userDefinedAssignment" mlir region. - - This is done by adding an entry point to ConvertCall.h in order - to call genUserCall with the region block arguments as arguments. - - The codegen for hlfir.region_assign with user defined assignment - will be added in a later patch. - - Differential Revision: https://reviews.llvm.org/D153404 - -commit 569716fc5c2c232adcd5ff840637be596c1de9b9 -Author: Tom Eccles -Date: Wed Jun 14 13:23:00 2023 +0000 - - [flang][hlfir] Fix multiple return declaration type - - When the ENTRY statement is used, the same source can return different - types depending on the entry point. These different return values are - storage associated (share the same storage). Previously, this led to the - declaration of the results to all have the largest type. This patch adds - a convert between the stack allocation and the declaration so that the - hlfir.decl gets the right type. - - I haven't managed to generate code where this convert converted a - reference to an allocation for a smaller type into an allocation for a - larger one, but I have added an assert just in case. - - This is a different solution to https://reviews.llvm.org/D152725, see - discussion there. - - Differential Revision: https://reviews.llvm.org/D152931 - -commit 6e3a8720474528f8f752d0afbc6b8b9efab96325 -Author: Dhruv Chawla <44582521+dc03@users.noreply.github.com> -Date: Wed Jun 7 12:46:52 2023 +0530 - - [SetVector] Improve performance for small sizes - - SmallSetVector has an inefficiency where it does set insertions - regardless of the number of elements present within it. This contrasts - with other "Small-" containers where they use linear scan up to a - certain size "N", after which they switch to another strategy. - - This patch implements this functionality in SetVector, adding a template - parameter "N" which specifies the number of elements upto which the - SetVector follows the "small" strategy. Due to the use of "if - constexpr", there is no "small" code emitted when N is 0 which makes - this a zero overhead change for users using the default behaviour. - - This change also allows having SmallSetVector use DenseSet instead of - SmallDenseSet by default, which helps a little with performance. - - The reason for implementing this functionality in SetVector instead of - SmallSetVector is that it allows reusing all the code that is already - there and it is just augmented with the "isSmall" checks. - - This change gives a good speedup (0.4%): - https://llvm-compile-time-tracker.com/compare.php?from=086601eac266ec253bf313c746390ff3e5656132&to=acd0a72a4d3ee840f7b455d1b35d82b11ffdb3c0&stat=instructions%3Au - - Differential Revision: https://reviews.llvm.org/D152497 - -commit ca81808cc31074198663eb4beb904f490598d14f -Author: Kiran Chandramohan -Date: Mon Jun 5 15:43:37 2023 +0000 - - [Flang][OpenMP] Refactor to properly fix privatisation of loop bounds - - The OpenMP loop Operations have the bounds attached to them. If the - loop bounds are privatised then the privatisation has to happen - before the loop operation is created. To do this the privatisation - is split into two steps. The first step performs cloning and - firstprivate handling, the second step performs lastprivate handling. - - This also reverts the changes in the temporary fix (D127137). - - Fixes https://github.com/flang-compiler/f18-llvm-project/issues/1171#issuecomment-1143880545 - Fixes https://github.com/flang-compiler/f18-llvm-project/issues/1171#issuecomment-1119997442 - - Fixes #60872 - - Reviewed By: NimishMishra - - Differential Revision: https://reviews.llvm.org/D151504 - -commit 4ad7279392653c0bcf564799ffb3f7e20ed4ef00 -Author: Peter Klausler -Date: Sat May 6 15:03:39 2023 -0700 - - [flang] CUDA Fortran - part 1/5: parsing - - Begin upstreaming of CUDA Fortran support in LLVM Flang. - - This first patch implements parsing for CUDA Fortran syntax, - including: - - a new LanguageFeature enum value for CUDA Fortran - - driver change to enable that feature for *.cuf and *.CUF source files - - parse tree representation of CUDA Fortran syntax - - dumping and unparsing of the parse tree - - the actual parsers for CUDA Fortran syntax - - prescanning support for !@CUF and !$CUF - - basic sanity testing via unparsing and parse tree dumps - - ... along with any minimized changes elsewhere to make these - work, mostly no-op cases in common::visitors instances in - semantics and lowering to allow them to compile in the face - of new types in variant<> instances in the parse tree. - - Because CUDA Fortran allows the kernel launch chevron syntax - ("call foo<<>>()") only on CALL statements and - not on function references, the parse tree nodes for CallStmt, - FunctionReference, and their shared Call were rearranged a bit; - this caused a fair amount of one-line changes in many files. - - More patches will follow that implement CUDA Fortran in the symbol - table and name resolution, and then semantic checking. - - Differential Revision: https://reviews.llvm.org/D150159 - -commit 9ceb0a7bc0e73c4aab6cfade225f3ab33c949b83 -Author: Carlos Eduardo Seo -Date: Sat May 20 05:16:50 2023 +0000 - - Fix nested block constructs for SELECT CASE - - In some scenarios, a SELECT CASE could cause an error while lowering to FIR. - This was caused by a spurious extra branch added after the end statement. - - Fixes #62726 - - Differential Revision: https://reviews.llvm.org/D151118 - -commit ef934174704b75c8e04830bfd4f0c0bbedde9621 -Author: Kelvin Li -Date: Tue May 23 19:02:49 2023 -0400 - - [flang] Support for PowerPC vector type - - The following PowerPC vector type syntax is added: - - VECTOR ( element-type-spec ) - - where element-type-sec is integer-type-spec, real-type-sec or unsigned-type-spec. - - Two opaque types (__VECTOR_PAIR and __VECTOR_QUAD) are also added. - - A finite set of functionalities are implemented in order to support the new types: - 1. declare objects - 2. declare function result - 3. declare type dummy arguments - 4. intrinsic assignment between the new type objects (e.g. v1=v2) - 5. reference functions that return the new types - - Submit on behalf of @tislam @danielcchen - - Authors: @tislam @danielcchen - - Differential Revision: https://reviews.llvm.org/D150876 - -commit 6f7a3b078191a925546ea3fead2e9cf0efdd9257 -Author: V Donaldson -Date: Tue May 16 13:34:57 2023 -0700 - - [flang] Non-type-bound defined IO lowering - - Generate supporting data structures and calls to new runtime IO functions - for defined IO that accesses non-type-bound procedures, such as `wft` in: - - module m1 - type t - integer n - end type - interface write(formatted) - module procedure wft - end interface - contains - subroutine wft(dtv, unit, iotype, v_list, iostat, iomsg) - class(t), intent(in) :: dtv - integer, intent(in) :: unit - character(*), intent(in) :: iotype - integer, intent(in) :: v_list(:) - integer, intent(out) :: iostat - character(*), intent(inout) :: iomsg - iostat = 0 - write(unit,*,iostat=iostat,iomsg=iomsg) 'wft was called: ', dtv%n - end subroutine - end module - - module m2 - contains - subroutine test1 - use m1 - print *, 'test1, should call wft: ', t(1) - end subroutine - subroutine test2 - use m1, only: t - print *, 'test2, should not call wft: ', t(2) - end subroutine - end module - - use m1 - use m2 - call test1 - call test2 - print *, 'main, should call wft: ', t(3) - end - -commit 4eab303404d6bb2252b4baf807c5ac87a0fa3125 -Author: Slava Zakharin -Date: Tue May 16 20:05:22 2023 -0700 - - [flang][hlfir] Fixed symbol lookup for character returns. - - Symbols corresponding to entries returning character results - must be mapped to EmboxCharOp, first, before we can map them - to DeclareOp. The code may be reworked after HLFIR is enabled - by default, but right now it seems like an acceptable solution to me. - - Differential Revision: https://reviews.llvm.org/D150749 - -commit 7f7bbc73175d94f63cba905191a4ecc341b9fdba -Author: Peter Klausler -Date: Tue May 16 12:33:29 2023 -0700 - - [flang] Correct overriding (or not) of inaccessible bindings - - Fortran doesn't allow inaccessible procedure bindings to be - overridden, and this needs to apply to generic resolution. - When resolving a type-bound generic procedure from another - module, ensure only that the most extended override from its - module is used if it is PRIVATE, not a later apparent override - from another module. - - Differential Revision: https://reviews.llvm.org/D150721 - -commit be5747e516937df6436c9abb8059b6e471c02226 -Author: Slava Zakharin -Date: Tue May 9 19:50:48 2023 -0700 - - [flang] Fixed global name creation for literal constants. - - The global names were created using a hash based on the address - of std::vector::data address. Since the memory may be reused - by different std::vector's, this may cause non-equivalent - constant expressions to map to the same name. This is what is happening - in the modified flang/test/Lower/constant-literal-mangling.f90 test. - - I changed the name creation to use a map between the constant expressions - and corresponding unique names. The uniquing is done using a name counter - in FirConverter. The effect of this change is that the equivalent - constant expressions are now mapped to the same global, and the naming - is "stable" (i.e. it does not change from compilation to compilation). - - Though, the issue is not HLFIR specific it was affecting several tests - when using HLFIR lowering. - - Differential Revision: https://reviews.llvm.org/D150380 - -commit c7ff45a529ca16c8a3dbff3b6786e41b49c195cc -Author: Jean Perier -Date: Tue May 9 09:22:24 2023 +0200 - - [flang][hlfir] Lower left-hand side vector subscripts to HLFIR - - This patch lowers assignments to vector subscripted designators into the - newly added hlfir.elemental_addr and hlfir.region_assign. - - Note that the codegen of these operation to FIR is still TODO and will - still emit a TODO message when trying to compile programs end to end. - - Differential Revision: https://reviews.llvm.org/D149962 - -commit 54c88fc9dfa5854a5891cf3d68d3d2c4a4ba0f25 -Author: Jean Perier -Date: Tue May 9 09:21:09 2023 +0200 - - [flang][hlfir] Lower WHERE to HLFIR - - Lower WHERE to the newly added hlfir.where and hlfir.elsewhere - operations. - - Differential Revision: https://reviews.llvm.org/D149950 - -commit b87e65531c58df55cfae4c06c7a68f84539aa779 -Author: Jean Perier -Date: Tue May 9 09:18:53 2023 +0200 - - [flang][hlfir] Lower forall to HLFIR - - Lower Forall to the previously added hlfir.forall, hlfir.forall_mask. - hlfir.forall_index, and hlfir.region_assign operations. - - The HLFIR assignment code lowering is moved into genDataAssignment for - more readability and so that user defined assignment (still a TODO), - will be able to share most of the logic. - - Differential Revision: https://reviews.llvm.org/D149878 - -commit 42df495114dc75fac4f75babe8f1ed43b15515fc -Author: Ethan Luis McDonough -Date: Fri May 5 15:50:18 2023 -0500 - - [flang] OpenMP allocate directive parse tree fix - - Addresses the same issue as the following abandoned revision: D104391. - - Rewrite leading declarative allocations so they are nested within their respective executable allocate directive - - Original: - ExecutionPartConstruct -> OpenMPDeclarativeAllocate - ExecutionPartConstruct -> OpenMPDeclarativeAllocate - ExecutionPartConstruct -> OpenMPExecutableAllocate - - After rewriting: - ExecutionPartConstruct -> OpenMPExecutableAllocate - | ExecutionPartConstruct -> OpenMPDeclarativeAllocate - | ExecutionPartConstruct -> OpenMPDeclarativeAllocate - - Reviewed By: kiranchandramohan - - Differential Revision: https://reviews.llvm.org/D148409 - - Co-authored-by: Isaac Perry - -commit a6e616cdb1bb89f19a9df5e72b1e4256fed19968 -Author: Ethan Luis McDonough -Date: Fri May 5 15:47:00 2023 -0500 - - Revert "[flang] OpenMP allocate directive parse tree fix" - - This reverts commit 597d8563cd66f23d857196bf135a0c513115ece2. - -commit 597d8563cd66f23d857196bf135a0c513115ece2 -Author: Ethan Luis McDonough -Date: Fri May 5 14:53:08 2023 -0500 - - [flang] OpenMP allocate directive parse tree fix - - Addresses the same issue as the following abandoned revision: D104391. - - Rewrite leading declarative allocations so they are nested within their respective executable allocate directive - - Original: - ``` - ExecutionPartConstruct -> OpenMPDeclarativeAllocate - ExecutionPartConstruct -> OpenMPDeclarativeAllocate - ExecutionPartConstruct -> OpenMPExecutableAllocate - ``` - - After rewriting: - ``` - ExecutionPartConstruct -> OpenMPExecutableAllocate - | ExecutionPartConstruct -> OpenMPDeclarativeAllocate - | ExecutionPartConstruct -> OpenMPDeclarativeAllocate - ``` - - Co-authored-by: Isaac Perry - - Reviewed By: kiranchandramohan - - Differential Revision: https://reviews.llvm.org/D148409 - -commit 936d40cdb5c82ca74358b260dc69f7150209a81a -Author: Ethan Luis McDonough -Date: Fri May 5 14:50:14 2023 -0500 - - Revert "[flang] OpenMP allocate directive parse tree fix" - - This reverts commit 5faf45a3d24e603cbc8fe4eb45da386653dae5e5. - Once again arcanist stripped the co-author metadata. I'm going to add it to the revision description and try one last time. - -commit 5faf45a3d24e603cbc8fe4eb45da386653dae5e5 -Author: Ethan Luis McDonough -Date: Fri May 5 14:32:45 2023 -0500 - - [flang] OpenMP allocate directive parse tree fix - - Addresses the same issue as the following abandoned revision: D104391. - - Rewrite leading declarative allocations so they are nested within their respective executable allocate directive - - Original: - ``` - ExecutionPartConstruct -> OpenMPDeclarativeAllocate - ExecutionPartConstruct -> OpenMPDeclarativeAllocate - ExecutionPartConstruct -> OpenMPExecutableAllocate - ``` - - After rewriting: - ``` - ExecutionPartConstruct -> OpenMPExecutableAllocate - | ExecutionPartConstruct -> OpenMPDeclarativeAllocate - | ExecutionPartConstruct -> OpenMPDeclarativeAllocate - ``` - - Reviewed By: kiranchandramohan - - Differential Revision: https://reviews.llvm.org/D148409 - -commit c89959842fe5c1631db60db5dd5994a42810d7e8 -Author: Ethan Luis McDonough -Date: Fri May 5 14:31:01 2023 -0500 - - Revert "[flang] OpenMP allocate directive parse tree fix" - - This reverts commit eaf7d97865140a17f13ad77e5dc0216438127094. - Arcanist stripped co-author data from initial commit. - -commit eaf7d97865140a17f13ad77e5dc0216438127094 -Author: Ethan Luis McDonough -Date: Fri May 5 13:49:45 2023 -0500 - - [flang] OpenMP allocate directive parse tree fix - - Addresses the same issue as the following abandoned revision: D104391. - - Rewrite leading declarative allocations so they are nested within their respective executable allocate directive - - Original: - ``` - ExecutionPartConstruct -> OpenMPDeclarativeAllocate - ExecutionPartConstruct -> OpenMPDeclarativeAllocate - ExecutionPartConstruct -> OpenMPExecutableAllocate - ``` - - After rewriting: - ``` - ExecutionPartConstruct -> OpenMPExecutableAllocate - | ExecutionPartConstruct -> OpenMPDeclarativeAllocate - | ExecutionPartConstruct -> OpenMPDeclarativeAllocate - ``` - - Reviewed By: kiranchandramohan - - Differential Revision: https://reviews.llvm.org/D148409 - -commit ec2c0e0f55637209d1901c73f162dd8625034a56 -Author: Slava Zakharin -Date: Thu May 4 08:47:28 2023 -0700 - - [flang][hlfir] Generate explicit HLFIR type cast for implicit logical<->integer conversion. - - hlfir.assign, in general, ends up calling the Assign runtime that asserts - that the types of LHS and RHS match. In case of implicit logical<->integer - conversions (allowed as an extension) the operands of hlfir.assign - have non-matching types. This change makes sure that the lowering - produces explicit type cast (either as a scalar fir.convert or - as a hlfir.elemental producing array expression). - - Reviewed By: jeanPerier - - Differential Revision: https://reviews.llvm.org/D149765 - -commit 583d492c630655dc0cd57ad167dec03e6c5d211c -Author: Jean Perier -Date: Wed May 3 09:19:12 2023 +0200 - - [flang][hlfir] Lower vector subscripted RHS designators - - Lower vector subscripted designators as values when they appear outside - of the assignment left-hand side and input IO contexts. - - This matches Fortran semantics where vector subscripted designators cannot - be written to outside of the two contexts mentioned above: they are - passed/taken by value where they appear. - - This patch uses the added hlfir.element_addr to lower vector designators - in lowering. But when reaching the end of the designator lowering, the - hlfir.element_addr is turned into an hlfir.elemental when lowering is - not asking for the hlfir.elemental_addr. - - This approach allows lowering vector subscripted in the same way in - while visiting the designator, and only adapt to the context at the - edge. - - The part where lowering uses the hlfir.elemental_addr will be - done in further patch as it requires lowering assignments in the - new hlfir.region_assign op, and there is not codegen yet for these - new operations. - - Differential Revision: https://reviews.llvm.org/D149480 - -commit af78197857115716802189ef073f83cdac9ede15 -Author: V Donaldson -Date: Wed Apr 12 15:37:19 2023 -0700 - - [flang] Remove `ignoring all compiler directives` warning - - The explicit `ignoring all compiler directives` reminder warning is no - longer accurate. Any similar, more accurate message is best generated - by the front end (change pending). - -commit fd922e6ab0b5324cdf36e2646132d802d3a04ce0 -Author: V Donaldson -Date: Wed Apr 5 11:13:36 2023 -0700 - - [flang] Nonconformant assigned gotos - - Modify code generation for assigned gotos to generate a runtime error - for most cases that violate F90 Clause 8.2.4, rather than treating a - nonconformant GOTO as a nop. For example, generate a runtime error for - a GOTO that attempts to branch to a label for a FORMAT statement. - Relax the requirement that an assigned GOTO with a label list must - branch to a label in the list, and instead allow a branch to any valid - assigned GOTO target in scope. - -commit 04a920b76acf0a52a3eb957c6331ba81a1173e2a -Author: Jean Perier -Date: Mon Apr 3 09:18:41 2023 +0200 - - [flang] preserve pointer rank in polymorphic_pointer => NULL() - - The current lowering for polymorphic pointer association was not - dealing with NULL in a "context aware" fashion: it was calling the - `PointerAssociate` runtime entry point with a fir.box target. - But the fir.box is a descriptor for a scalar, this lead the - runtime to set the pointer rank to zero, regardless of its actual - rank. - - I do not think there is a way to expose this problem with the Fortran - code currently supported by flang, because most further manipulation of - the pointer would either set the rank correctly, or do not rely on the - rank in the runtime descriptor. - - However, this is incorrect, and when assumed rank are supported, the - following would have failed: - - ``` - subroutine check_rank(p) - class(*), pointer :: p(..) - p => null() - select rank(p) - rank (1) - print *, "OK" - rank default - print *, "FAILED" - end select - end subroutine - class(*), pointer :: p(:) - p => null() - call check_rank(p) - end - ``` - - Instead, detect NULL() in polymorphic pointer lowering and trigger the - deallocation of the pointer. - - Differential Revision: https://reviews.llvm.org/D147317 - -commit 5e521580e60a6bf5bf62c19b2028f9f390c5e4a6 -Author: V Donaldson -Date: Fri Mar 31 09:36:16 2023 -0700 - - [flang] IO condition specfier control flow - - Execution of a statement such as - - read(internal,*,err=666,iostat=stat) k - - that terminates with an END or EOR condition must not take the ERR branch. - -commit 6472a2ee363f3b5ac823e471b7ba5582c101a528 -Author: Valentin Clement -Date: Tue Mar 14 16:01:36 2023 +0100 - - [flang] Handle parent component on the LHS of intrinsic assignment - - When the LHS is referring to a parent component the box need to be - reboxed to the parent component type so the runtime can handle the - assignment correctly. - - Reviewed By: PeteSteinfeld - - Differential Revision: https://reviews.llvm.org/D146046 - -commit b07ef9e7cd6f5348df0a4f63e70a60491427ff64 -Author: Renaud-K -Date: Wed Mar 8 18:39:40 2023 -0800 - - Break circular dependency between FIR dialect and utilities - -commit 4f3c98542bebac90546a03363e5956f9862ae985 -Author: Valentin Clement -Date: Mon Mar 6 09:35:36 2023 +0100 - - [flang] Use AssignPolymorphic when LHS is polymorphic - - Make use of the new runtime entry point for assignment to - LHS allocatable polymorphic. - - Reviewed By: jeanPerier - - Differential Revision: https://reviews.llvm.org/D145324 - -commit 2c1433453d1670f668220670b8f2df60f9dc9949 -Author: V Donaldson -Date: Mon Feb 27 14:05:53 2023 -0800 - - [flang] Block construct - - A block construct is an execution control construct that supports - declaration scopes contained within a parent subprogram scope or another - block scope. (blocks may be nested.) This is implemented by applying - basic scope processing to the block level. - - Name uniquing/mangling is extended to support this. The term "block" is - heavily overloaded in Fortran standards. Prior name uniquing used tag `B` - for common block objects. Existing tag choices were modified to free up `B` - for block construct entities, and `C` for common blocks, and resolve - additional issues with other tags. The "old tag -> new tag" changes can - be summarized as: - - -> B -- block construct -> new - B -> C -- common block - C -> YI -- intrinsic type descriptor; not currently generated - CT -> Y -- nonintrinsic type descriptor; not currently generated - G -> N -- namelist group - L -> -- block data; not needed -> deleted - - Existing name uniquing components consist of a tag followed by a name - from user source code, such as a module, subprogram, or variable name. - Block constructs are different in that they may be anonymous. (Like other - constructs, a block may have a `block-construct-name` that can be used - in exit statements, but this name is optional.) So blocks are given a - numeric compiler-generated preorder index starting with `B1`, `B2`, - and so on, on a per-procedure basis. - - Name uniquing is also modified to include component names for all - containing procedures rather than for just the immediate host. This - fixes an existing name clash bug with same-named entities in same-named - host subprograms contained in different-named containing subprograms, - and variations of the bug involving modules and submodules. - - F18 clause 9.7.3.1 (Deallocation of allocatable variables) paragraph 1 - has a requirement that an allocated, unsaved allocatable local variable - must be deallocated on procedure exit. The following paragraph 2 states: - - When a BLOCK construct terminates, any unsaved allocated allocatable - local variable of the construct is deallocated. - - Similarly, F18 clause 7.5.6.3 (When finalization occurs) paragraph 3 - has a requirement that a nonpointer, nonallocatable object must be - finalized on procedure exit. The following paragraph 4 states: - - A nonpointer nonallocatable local variable of a BLOCK construct - is finalized immediately before it would become undefined due to - termination of the BLOCK construct. - - These deallocation and finalization requirements, along with stack - restoration requirements, require knowledge of block exits. In addition - to normal block termination at an end-block-stmt, a block may be - terminated by executing a branching statement that targets a statement - outside of the block. This includes - - Single-target branch statements: - - goto - - exit - - cycle - - return - - Bounded multiple-target branch statements: - - arithmetic goto - - IO statement with END, EOR, or ERR specifiers - - Unbounded multiple-target branch statements: - - call with alternate return specs - - computed goto - - assigned goto - - Lowering code is extended to determine if one of these branches exits - one or more relevant blocks or other constructs, and adds a mechanism to - insert any necessary deallocation, finalization, or stack restoration - code at the source of the branch. For a single-target branch it suffices - to generate the exit code just prior to taking the indicated branch. - Each target of a multiple-target branch must be analyzed individually. - Where necessary, the code must first branch to an intermediate basic - block that contains exit code, followed by a branch to the original target - statement. - - This patch implements an `activeConstructStack` construct exit mechanism - that queries a new `activeConstruct` PFT bit to insert stack restoration - code at block exits. It ties in to existing code in ConvertVariable.cpp - routine `instantiateLocal` which has code for finalization, making block - exit finalization on par with subprogram exit finalization. Deallocation - is as yet unimplemented for subprograms or blocks. This may result in - memory leaks for affected objects at either the subprogram or block level. - Deallocation cases can be addressed uniformly for both scopes in a future - patch, presumably with code insertion in routine `instantiateLocal`. - - The exit code mechanism is not limited to block construct exits. It is - also available for use with other constructs. In particular, it is used - to replace custom deallocation code for a select case construct character - selector expression where applicable. This functionality is also added - to select type and associate constructs. It is available for use with - other constructs, such as select rank and image control constructs, - if that turns out to be necessary. - - Overlapping nonfunctional changes include eliminating "FIR" from some - routine names and eliminating obsolete spaces in comments. - -commit e5921ef021efa7e696421069d294d66e58df2541 -Author: Jean Perier -Date: Mon Feb 27 09:05:11 2023 +0100 - - [flang][hlfir] Lower associate construct to HLFIR - - - always use genExprAddr when lowering to HLFIR: it does not create - temporary for array sections without vector subscripts, so there is - no need to have custom logic. - - - update mangling to deal with AssocDetailsEntity. Their name is - required in HLFIR so that it can be added to the hlfir.declare - that is created for the selector once it is lowered. This should - allow getting debug info for selector when debug info are generated - from hlfir.declare. - - The rest of associate construct lowering is unchanged and shared with - the current lowering. - - This patch also enables select type lowering to work properly, but some - other todos (mainly about parent component references) prevents porting - the tests for now, so this will be done later. - - Differential Revision: https://reviews.llvm.org/D144740 - -commit 713b3ad43850ad9fc89b89f53ab37b54f744ec70 -Author: Jean Perier -Date: Mon Feb 27 09:04:20 2023 +0100 - - [flang][hlfir] Lower allocatable assignment to HLFIR - - Nothing much to do except set the right attributes on hlfir.assign. - - Differential Revision: https://reviews.llvm.org/D144727 - -commit 18983df02fdfb218a6e46620912ef1d0ebb428e9 -Author: Peter Steinfeld -Date: Wed Feb 22 10:51:54 2023 -0800 - - [Flang] Don't crash when BOZ literals are on the rhs of an assignment - - For BOZ literals, the rhsType will be empty. Check for that before - trying to access its value. - - Differential Revision: https://reviews.llvm.org/D144576 - -commit 33c29a82a9b3b8a0354a5b5bd75b462505602107 -Author: Valentin Clement -Date: Tue Feb 21 10:14:00 2023 +0100 - - [flang] Use runtime Assign when rhs is polymorphic - - Use the runtime when there lhs or rhs is polymorphic. The runtime - allows to deal better with polymorphic entities and aliasing. - - Reviewed By: PeteSteinfeld - - Differential Revision: https://reviews.llvm.org/D144418 - -commit f5cca3c5ce7a1a6d9934e22c60e47ccd1834cf99 -Author: Valentin Clement -Date: Thu Feb 16 20:59:54 2023 +0100 - - [flang] Handle expression in SELECT TYPE selector - - Expression in selector were raising an error. In some - cases expression can be found in selector. This patch - updates the code to accept expression and adds a lowering - test. - - Reviewed By: PeteSteinfeld, vdonaldson - - Differential Revision: https://reviews.llvm.org/D144185 - -commit b0de87268a60e9e755b34b2fb505589e01aab14c -Author: Valentin Clement -Date: Thu Feb 16 09:05:12 2023 +0100 - - [flang] Retrieve the correct scope when lowering SELECT TYPE - - Scope to retrieve the associating entity is needed to map the - symbol to the IR value. The scope can be found with a source - information. For the type case in SELECT TYPE construct, the source - information is on the Statement. This patch updates - the lowering so the scopes for each type guards is retrieved - before the processing. - - Reviewed By: PeteSteinfeld, vdonaldson - - Differential Revision: https://reviews.llvm.org/D144133 - -commit cedfd2721e3492e5ab0ea86d24d8027846687c27 -Author: Jean Perier -Date: Thu Feb 9 09:02:43 2023 +0100 - - [flang][hlfir] Lower procedure designators to HLFIR - - - Add a convertProcedureDesignatorToHLFIR that converts the - fir::ExtendedValue from the current lowering to a - fir.boxproc/tuple mlir::Value. - - - Allow fir.boxproc/tuple as hlfir::Entity values - (a function is an address, but from a Fortran entity point of view, - procedure that are not procedure pointers cannot be assigned to, so - it makes a lot more sense to consider those as values). - - - Modify symbol association to not generate an hlfir.declare for dummy - procedures. They are not needed and allowing hlfir.declare to declare - function values would make its verifier and handling overly complex - for little benefits (maybe an hlfir.declare_proc could be added if it - turnout out useful later for debug info and attributes storing - purposes). - - - Allow translation from hlfir::Entity to fir::ExtendedValue. - convertToBox return type had to be relaxed because some intrinsics - handles both object and procedure arguments and need to lower their - object arguments "asBox". fir::BoxValue is not intended to carry - dummy procedures (all its member functions would make little sense - and its verifier does not accept such type). - Note that AsAddr, AsValue and AsBox will always return the same MLIR - value for procedure designators because they are always handled the - same way in FIR. - - Differential Revision: https://reviews.llvm.org/D143585 - -commit ab9c4e9fff272dd88c92a2d2f3a2e5c66e07e6e2 -Author: Jean Perier -Date: Tue Feb 7 09:22:47 2023 +0100 - - [flang][NFC] addSymbol/lookupSymbol clean-up - - HLFIR requires mapping symbol to a single mlir::Value (produced - by a fir::FortranVariableOpInterface), while the current lowering - maps the value to a fir::ExtdendedValue. - - So far, the HLFIR symbol query was a special one. Hence, all the code - directly using symMap.lookupSymbol and symMap.addSymbol did not work - with the lowering to HLFIR. - - Refactor the code so that symbol lookup and add symbol go through - the converter in a centralize place that handles the HLFIR case - (translate fir::FortranVariableOpInterface to fir::ExtdendedValue - in lookups, and generate hlfir.declare when adding symbols). - - In the refactoring, fir::FortranVariableOpInterface is added as - a symbolBox variant to avoid special casing all lookups (shallowLookup...). - - Remove some unused SymbolBox member function instead of updating - them. - - Differential Revision: https://reviews.llvm.org/D143395 - -commit dda01632db12d3b11d8e2e21d73d438626cb0436 -Author: Valentin Clement -Date: Tue Feb 7 09:15:54 2023 +0100 - - [flang] Use PointerAssociateLowerBounds when there is lower bounds - - The current code was not taking provided lower bounds when the pointer - is polymorphic and was just calling PointerAssociate. This patch - updates the behavior and use PointerAssociateLowerBounds with the provided - lower bounds. - - Reviewed By: jeanPerier - - Differential Revision: https://reviews.llvm.org/D143392 - -commit 3b73fc320f918a12a77e617f162bc7f7111ddfaf -Author: Valentin Clement -Date: Mon Feb 6 21:06:44 2023 +0100 - - [flang] Fix creation of the bound array for pointer remapping - - The runtime function expects a 2 x newRank array and the code - was passing a newRank x 2 array. This patch updates the - creation of the array to fit the runtime expectation. - - Reviewed By: jeanPerier - - Differential Revision: https://reviews.llvm.org/D143405 - -commit ed8e858a1478d18d321f104a86a579e03ba1886e -Author: Jean Perier -Date: Mon Feb 6 15:14:08 2023 +0100 - - [flang][hlfir] deref pointers before lowering assignment to hlfir.assign - - There is little point not to dereference pointers LHS and RHS before - before emitting an hlfir.assign when lowering an assignment. - This pushes complexity and descriptor read side effects that are better - expressed in a load before the assignment. - - Differential Revision: https://reviews.llvm.org/D143372 - -commit 7f0074a64a30c448fec2f36d08dffbe64134e84d -Author: Valentin Clement -Date: Fri Feb 3 12:21:59 2023 +0100 - - [flang] Avoid double finalization when intrinsic assignment is done in the runtime - - genRecordAssignment is emitting code to call Assign in the runtime for some cases. - In these cases, the finalization is done by the runtime so we do not need to do it in - a separate cal to avoid multiple finalization.. - Also refactor the code in Bridge so the actual finalization of allocatable - is done before any reallocation. We might need to push this into ReallocIfNeeded. - It is not clear if the allocatable lhs needs to be finalized in any cases or only if it is - reallocated. - - Reviewed By: jeanPerier - - Differential Revision: https://reviews.llvm.org/D143186 - -commit 6ada493035efcd1e90e8e062595c478babe7cd18 -Author: Valentin Clement -Date: Fri Feb 3 10:16:54 2023 +0100 - - [flang] Fix potential null scope when lowering dispatch table op - - Similary to D140209, the scope might need to be retrieved - from the typeSymbol. The test code was crashing because the - scope passed to CollectBindings was initially null. - - Reviewed By: jeanPerier - - Differential Revision: https://reviews.llvm.org/D143188 - -commit 591e3e6207894ebaee1e47a82fcfb3b246459f76 -Author: Valentin Clement -Date: Wed Feb 1 15:53:52 2023 +0100 - - [flang] Make EndProgramStmt a NOP + early return - - Fix done in D143055 can be simpler by making EndProgramStmt a NOP - and dealing with the exit in `endNewFunction` in a centralize way. - Also add finalization when there is an early exit in the main - program. - - Reviewed By: jeanPerier - - Differential Revision: https://reviews.llvm.org/D143065 - -commit d65aeeb360e279dfc7c9ddf7c8555b585e9df0d4 -Author: Valentin Clement -Date: Wed Feb 1 14:45:53 2023 +0100 - - [flang] Make sure derived-type finalization is done before return - - Finalization needs to be done before the terminator. In case - of end program, this was done after it and trigger a verifier error. - This patch fixes this case. - - Reviewed By: jeanPerier - - Differential Revision: https://reviews.llvm.org/D143055 - -commit 97492fd1aed56e3d041952914849d95b5ff999af -Author: Valentin Clement -Date: Tue Jan 31 13:46:12 2023 +0100 - - [flang] derived-type finalization - - This patch implements the derived-type finalization for - monomorphic and polymorphic derived-type. - - The finalization is done through a call to the `Destroy` - runtime function so the allocatable component object are also - finalized correctly when needed. It would be possible to finalize - monomorphic derived-type with non finalizable component with a - direct call to their finalize subroutine. - - 7.5.6.3 point 1: LHS nonallocatable object and LHS allocatable - object finalization. Done with call to `Destroy` for monomorphic - derived-type and through `Assign` for polymorphic entities. - - 7.5.6.3 point 2: Done within the deallocation calls. - - 7.5.6.3 point 3: A function context is added to the bridge to - attach finalization that need to happen on function/subroutine - exit. - - 7.5.6.3 point 4: BLOCK construct not yet implemented. - - 7.5.6.3 point 5/6: Finalization attach to the stmtCtx in a - similar way than 9.7.3.2 point 4. - - 7.5.6.3 point 7: INTENT(OUT) finalization done with a - call to `Destroy` runtime function call. - - This patch passes 9/10 tests in the proposed test-suite - https://github.com/llvm/llvm-test-suite/pull/13 - - - The case with BLOCK construct will be implemented later when - BLOCK are implemented upstream. - - - Automatic deallocation is not yet implemented. Finalization triggered - by automatic deallocation is then not triggered. - - Reviewed By: jeanPerier, PeteSteinfeld - - Differential Revision: https://reviews.llvm.org/D142707 - -commit 3af9dfe464446da8e9acea45681df28b18583370 -Author: Kiran Chandramohan -Date: Wed Jan 25 13:37:54 2023 +0000 - - [Flang][Debug] Use pathnames from location of functions - - This ensures that functions in included files have the correct path - in their file metadata. - - Note: This patch also sets all locations to have the full path names. - - Reviewed By: vzakhari, PeteSteinfeld - - Differential Revision: https://reviews.llvm.org/D142263 - -commit 7aa8a9f1abe2e0133febe015ac502737b06828f4 -Author: Valentin Clement -Date: Wed Jan 25 09:17:27 2023 +0100 - - [flang] Fix bounds array creation for pointer remapping calls - - `PointerAssociateRemapping` expect a descriptor holding - a newRank x 2 array of int64. The previous lowering was wrong. - Adapt the lowering to fit the expectation of the runtime. - Use the `bounds` to get the rank. - - Reviewed By: PeteSteinfeld - - Differential Revision: https://reviews.llvm.org/D142487 - -commit 7531c87183822cf8931496a757a09779e24aeac0 -Author: Jean Perier -Date: Fri Jan 20 14:05:42 2023 +0100 - - [flang][hlfir] Enable allocate, deallocate, pointer assignment lowering - - The previous patches allowed lowering allocatable/and pointer designator - expressions with HLFIR. - This patch updates the bridge genExprMutableBox to use HLFIR lowering - when HLFIR flag is set. For allocate and deallocate lowering that use - genExprMutableBox, no other change is needed. - - For pointer assignments, the code doing the pointer assignments in the - bridge can be reused and is simply moved so that it can be shared, and - the "explicit context" special cases of the previous lowering are - by-passed. - - The code doing pointer assignment revealed that convertExprToAddress - did not match the previous genExprAddr behavior (that actually - does not create temps for "x" where x is not contiguous). - Instead of trying to copy the old behavior that is a bit weird (was - dictated by the implementation rather than design). Update - convertExprToAddress to do something sensible and that works with - the current genExprAddr usages (if anything, it should saves bogus - array section temps). - - Differential Revision: https://reviews.llvm.org/D142197 - -commit eef0210706bc16ffde17d5b574799d8aae705790 -Author: Kiran Chandramohan -Date: Thu Jan 19 16:49:26 2023 +0000 - - [Flang][Debug] Modifications for getting pathname - - -> Use file pathname from the Flang frontend. It is the frontend - that is in-charge of finding the files and is hence the canonical - source for paths. - -> Convert pathname to absolute pathname while creating the moduleOp. - - Co-authored-by: Peter Klausler - - Reviewed By: PeteSteinfeld, vzakhari, jeanPerier, awarzynski - - Differential Revision: https://reviews.llvm.org/D141674 - -commit a459a2485b54fbd9e1f8a48061e79cdcd12b12a5 -Author: Valentin Clement -Date: Thu Jan 19 17:32:02 2023 +0100 - - [flang] Fix SELECT TYPE lowering when CLASS DEFAULT is not the last type guard - - CLASS DEFAULT needs to be the last attribute when fir.select_type op is created. - It needs to be at its actual position in the Fortran code when the TypeGuardStmt - are processed. The current lowering was crashing when CLASS DEFAULT was not at - the last position. - This patch fixes the issue by tracking the actual position of the CLASS DEFAULT - type guard and set it at the correct position after the fir.select_type op - is created. - - Reviewed By: jeanPerier, PeteSteinfeld - - Differential Revision: https://reviews.llvm.org/D142091 - -commit 91682b2631b224a9f6dca9512b5e0951cc4a7762 -Author: Kazu Hirata -Date: Sat Jan 14 14:06:18 2023 -0800 - - Remove redundant initialization of std::optional (NFC) - -commit 199e49746db83f1e56d5899f1905784bbfa142e3 -Author: Jean Perier -Date: Fri Jan 13 09:15:52 2023 +0100 - - [flang] Lower elemental intrinsics to hlfir.elemental - - - Move the core code generating hlfir.elemental for user calls from - genUserElementalCall into a new ElementalCallBuilder class and use - C++ CRTP (curiously recursive template pattern) to implement the - parts specific to user and intrinsic call into ElementalUserCallBuilder - and ElementalIntrinsicCallBuilder. This allows sharing the core logic - to lower elemental procedures for both user defined and intrinsics - procedures. - - - To allow using ElementalCallBuilder, split the intrinsic lowering code - into two parts: first lower the arguments to hlfir::Entity regardless - of the interface of the intrinsics, and then, in a different function - (genIntrinsicProcRefCore), prepare the hlfir::Entity according to the - interface. This allows using the same core logic to prepare "normal" - arguments for non-elemental intrinsics, and to prepare the elements of - array arguments inside elemental call (ElementalIntrinsicCallBuilder - calls genIntrinsicProcRefCore once it has computed the scalar actual - arguments). - To allow this split, genExprBox/genExprAddr/genExprValue logic had to - be split in ConvertExprToHlfir.[cpp/h]. - - - Add missing statement context pushScope/finalizeAndPop around the - code generation inside the hlfir.elemental so that any temps created - while lowering the call at the element level is correctly cleaned-up. - - - One piece of code in hlfir::Entity::hasNonDefaultLowerBounds() was wrong for assumed shape arrays (returned true when an assumed shaped array had no explicit lower bounds). This caused the added test to hit a bogus TODO, so fix it. - - Elemental intrinsics returning are still TODO (e.g., adjustl). I will implement this in a next patch, this one is big enough. - - Differential Revision: https://reviews.llvm.org/D141612 - -commit 87e547d8f035c8de321e246629dd2b1ccec31662 -Author: Kiran Chandramohan -Date: Thu Jan 12 10:34:34 2023 +0000 - - [Flang] Add/Restore basic debug support (1/n) - - Recent changes to MLIR meant that Flang does not generate any debug line - table information. - - This patch adds a pass that provides some foundation work with which - basic line table debug info can be generated. A walk is performed on - all the `func` ops in the module and they are decorated with a fusedLoc - op that contains the debug metadata for the subroutine along with - location information. - - Alternatives include populating this info during lowering or during FIR - to LLVM Dialect conversion. - - Note: Patches in future will add - -> more realistic debug info for types and other fields. - -> driver flags to control generation of debug. - - Fixes #58634. - - Reviewed By: awarzynski, vzakhari - - Differential Revision: https://reviews.llvm.org/D137956 - -commit 4e78f88561af26c74b4b7fa2a017cd836a9f9bf4 -Author: Jean Perier -Date: Tue Jan 10 09:28:08 2023 +0100 - - [flang] Lower addresses inside global initializers in HLFIR - - Move the code to lower an expression to address or a box in HLFIR from - Bridge.cpp to ConvertExpr.cpp so that it can be used inside - ConvertVariable.cpp (that needs to use a different symbol map that the - one held in the bridge). - - Lower NULL to hlfir.null. - - This allows lowering derived type constant structure constructors with - pointer components into fir.global. - - Differential Revision: https://reviews.llvm.org/D141276 - -commit c09215860fd5c32012ef4fdc5a001485a04fe85a -Author: Kazu Hirata -Date: Sat Jan 7 22:26:48 2023 -0800 - - [flang] Use std::optional instead of llvm::Optional (NFC) - - This patch replaces (llvm::|)Optional< with std::optional<. I'll post - a separate patch to remove #include "llvm/ADT/Optional.h". - - This is part of an effort to migrate from llvm::Optional to - std::optional: - - https://discourse.llvm.org/t/deprecating-llvm-optional-x-hasvalue-getvalue-getvalueor/63716 - -commit 4d4d4785e00824b8f4824d09126547379d5a2093 -Author: Kazu Hirata -Date: Sat Jan 7 20:55:47 2023 -0800 - - [flang] Add #include (NFC) - - This patch adds #include to those files containing - llvm::Optional<...> or Optional<...>. - - I'll post a separate patch to actually replace llvm::Optional with - std::optional. - - This is part of an effort to migrate from llvm::Optional to - std::optional: - - https://discourse.llvm.org/t/deprecating-llvm-optional-x-hasvalue-getvalue-getvalueor/63716 - -commit 609b789170625277f631139c790c22d527ff1eed -Author: V Donaldson -Date: Tue Jan 3 10:31:30 2023 -0800 - - [flang] Control flow graph issues - - Address several issues involving control flow graph generation and - structured code ops. - - - Fix a problem with constructs nested inside unstructured selection - constructs. This is a general problem involving branches that are - implied rather than explicit. It is addressed in the generic genFIR - "wrapper" function that calls individual statement-specific genFIR calls. - - - The previous fix requires some compensating changes in IF and DO - construct code lowering. - - - Streamline the code to generate explicit DO loop variable updates. - - - Fix a problem with the individual detailed genFIR calls made in the - genFIR(SelectTypeConstruct) call. - - - Modify control flow graph generation to support the insertion of - deallocation and finalization code when lowering most END - statements. - -commit a8234196c58396c0505ac93983dafee743a67b11 -Author: Peter Klausler -Date: Mon Dec 19 12:41:25 2022 -0800 - - [flang] Restore checking for some optional values before use - - Recent commits (2098ad7f00324ee0f2a6538f418a6f81dfdd2edb and - 15a9a72ee68166c0cff3f036cacd3c82be66c729) replaced usage of "o.value()" - on optionals with "*o". Those optional values are expected to be - present -- but now, if it ever turns out that they're not, - compilation will proceed with garbage data rather than crashing - immediately (and more debuggably) with an uncaught exception. - - Add asserts for presence to restore the previous level of safety. - (I could have revert these patches so as to resume used of .value() - but I didn't want to just have them get broken again.) - - Differential Revision: https://reviews.llvm.org/D140340 - -commit 93129ca8d1cf618390a16e5d4315d0fd15170c51 -Author: Jean Perier -Date: Tue Dec 20 13:49:38 2022 +0100 - - [flang] Do not convey captured globals through host link - - Addresses and properties (bounds, length parameters) of host - variables associated in an internal procedure were all passed via - an extra tuple argument of the internal procedure. - This extra tuple is in general an overhead: it must be created and - passed, and require creating thunks when taking the address of the - internal procedure. - This patch allows not using the tuple for host global variables - (from modules, common block, or local saved variables) since they can - be instantiated from the fir.global symbol in the internal procedure - instead. - Add a fir.internal_proc attribute to mlir::FuncOp for internal procedures - so that ArrayValueCopy can still detect internal procedures even if they - do not have a tuple argument. - - Differential Revision: https://reviews.llvm.org/D140288 - -commit fc61400cb81200198cf5b1d53cff2d29f5909800 -Author: Jean Perier -Date: Mon Dec 19 02:40:25 2022 -0800 - - [flang] Fix llvm::Optional warning caused by D140220 - - Using llvm::Optional::value() was just deprecated in LLVM. - Remove the usage that was added by D140220 and replace it by an assert. - - https://lab.llvm.org/buildbot/#/builders/160/builds/14222 - -commit 8febe67851458645f93efa33d72717b732007ca7 -Author: Jean Perier -Date: Mon Dec 19 11:11:23 2022 +0100 - - [flang] Lower statement function references in HLFIR - - Enable lowering of statement function references in HLFIR. This follows - the same principle as statement function lowering with the current - lowering: - - Actual arguments are lowered and mapped to the statement function - dummy symbols. - - "HostAssociated" symbols are mapped to their host values (these are - the symbols referred to inside the statement function expressions that - are not statement function dummies. e.g: `x` in `stmt_func(i) = - x(i)`). - - The statement function expression is evaluated. - - evaluate::SetLength has to be lowered to deal with statement functions - returning characters since the front-end is generating one to ensure the - statement function expression value is trimmed/padded to match the statement - function declared type. - - Differential Revision: https://reviews.llvm.org/D140220 - -commit 15a9a72ee68166c0cff3f036cacd3c82be66c729 -Author: Fangrui Song -Date: Sat Dec 17 22:22:47 2022 +0000 - - [flang] llvm::Optional::value() => => operator*/operator-> - - std::optional::value() has undesired exception checking semantics and is - unavailable in older Xcode (see _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS). The - call sites block std::optional migration. - -commit 9379ca0a257780961a7e77c1a56c70d00cd85909 -Author: Valentin Clement -Date: Thu Dec 15 12:02:11 2022 +0100 - - [flang] Fix associating entity when selector is an array, pointer or allocatable - - In SELECT TYPE, within the block following TYPE IS, the associating entity is not polymorphic. - It has the type named in the type guard and other properties taken from the - selector. Within the block following a CLASS IS type guard statement, the - associating entity is polymorphic and has the declared type named in the type - guard statement. - This patch makes sure the associating entity matches the selector if it is - an array, a pointer or an allocatable. - - Reviewed By: jeanPerier - - Differential Revision: https://reviews.llvm.org/D140017 - -commit 518e6f12f37cc47be99c6d218bf07c0191c66de2 -Author: V Donaldson -Date: Mon Dec 12 14:20:06 2022 -0800 - - [flang] Submodules - - A submodule is a program unit that may contain the implementions of procedures - declared in an ancestor module or submodule. - - Processing for the equivalence groups and variables declared in a submodule - scope is similar to existing processing for the equivalence groups and - variables in module and procedure scopes. However, module and procedure scopes - are tied directly to code in the Pre-FIR Tree (PFT), whereas processing for a - submodule must have access to an ancestor module scope that is guaranteed - to be present in a .mod file, but is not guaranteed to be in the PFT. This - difference is accommodated by tying processing directly to a front end scope. - Function scopes that can be processed on the fly are done that way; the - resulting variable information is never stored. Module and submodule scopes - whose symbol information may be needed during lowering of any number of module - procedures are instead cached on first use, and reused as needed. - - These changes are a direct extension of current code. All module and submodule - variables in scope are processed, whether referenced or not. A possible - alternative would be to instead process symbols only when first used. While - this could ultimately be beneficial, such an approach must account for the - presence of equivalence groups. That information is not currently available - for on-the-fly variable processing. - - Some additional changes are needed to include submodules in places where - modules must be considered, and to include separate module procedures in - places where other subprogram variants are considered. There is also a fix - for a bug involving the use of variables in an equivalence group in a - namelist group, which also involves scope processing code. - -commit 40cb4fd0b9c9088de79435d829314fcd32ba2779 -Author: Valentin Clement -Date: Mon Dec 12 14:04:17 2022 +0100 - - [flang] Perform polymorphic pointer association with runtime call - - pointer association to a polymorphic pointer needs to potentially - update the element size in the descriptor. Update the pointer association - to polymoprhic pointer with a runtime call to PointerAssociate. - - Reviewed By: jeanPerier - - Differential Revision: https://reviews.llvm.org/D139825 - -commit 788960d6286325c59d1e8e8e85743dafd4a61476 -Author: Jean Perier -Date: Tue Dec 6 13:53:08 2022 +0100 - - [flang] Allow conversion from hlfir.expr to fir::ExtendedValue - - For now at least, the plan is to keep hlfir.expr usage limited as - sub-expression operand, assignment rhs, and a few other contexts ( - e.g. Associate statements). The rest of lowering (statements lowering - in the bridge) will still expect to get and manipulate characters and - arrays in memory. That means that hlfir.expr must be converted to - variable in converter.genExprAddr/converter.genExprBox. - - This is done using an hlfir.associate, and generating the related - hlfir.end_associate in the statement context. - - hlfir::getFirBase of is updated to avoid bringing in the HLFIR - fir.boxchar/fir.box into FIR when the entity was created with - hlfir::AssociateOp. - - Differential Revision: https://reviews.llvm.org/D139328 - -commit 491b6a9ccb05e5e6784ce50718570d204378c75e -Author: Valentin Clement -Date: Mon Dec 5 18:28:13 2022 +0100 - - [flang] Fix pointer association with remap on polymorphic entities - - Runtime is expecting a 1d array. This patch fixes the generation - of the array holding the bounds to be passed to the runtime function call. - - Reviewed By: jeanPerier, PeteSteinfeld - - Differential Revision: https://reviews.llvm.org/D139324 - -commit 42b21ddaadd3545945f29a8ccdcc89779542c30e -Author: Valentin Clement -Date: Mon Dec 5 09:35:50 2022 +0100 - - [flang] Pointer assignment with remapping involcing polymorphic entities - - Lower pointer assignment with remapping involving polymorphic entities - to runtime call to PointerAssociateRemapping. - For the time being all pointer assignment involcing polymorphic entities are - done with the runtime call. When lhs is not unlimited polymorphic - we might be able to do it inlined as well. - - Reviewed By: jeanPerier, PeteSteinfeld - - Differential Revision: https://reviews.llvm.org/D139198 - -commit 19811165482d87582d31219305f946a77208a1f2 -Author: Jean Perier -Date: Mon Dec 5 09:05:37 2022 +0100 - - [flang] Lower function return to HLFIR - - The only special thing that is needed is to update the bridge symbol - lookup to deal with the HLFIR symbol lookup (symbols are mapped to - fir::FortranVariableInterface operations, not Fortran::Lower::SymbolBox). - - Differential Revision: https://reviews.llvm.org/D139201 - -commit 9a41739565d9f7ce94da5e7d83947ead73d9bd54 -Author: Kazu Hirata -Date: Sat Dec 3 12:14:21 2022 -0800 - - [flang] Use std::nullopt instead of None (NFC) - - This patch mechanically replaces None with std::nullopt where the - compiler would warn if None were deprecated. The intent is to reduce - the amount of manual work required in migrating from Optional to - std::optional. - - This is part of an effort to migrate from llvm::Optional to - std::optional: - - https://discourse.llvm.org/t/deprecating-llvm-optional-x-hasvalue-getvalue-getvalueor/63716 - -commit f8ea349a6d4b71e04f0eff637ee1a71bb15c8aa1 -Author: Valentin Clement -Date: Fri Dec 2 15:51:01 2022 +0100 - - [flang] Perform assignment to polymorphic allocatable with runtime call - - Lower assignment to polymorphic allocatable to the `Assign` runtime - call. - - Reviewed By: jeanPerier - - Differential Revision: https://reviews.llvm.org/D139192 - -commit c44292f15b0ce0fa1866c80211b341733b043efb -Author: Valentin Clement -Date: Fri Dec 2 09:52:06 2022 +0100 - - [flang] Enable character type guard in select type - - SELECT TYPE lower and conversion was not handling - `character` type guard. This add support for it. - - Reviewed By: jeanPerier - - Differential Revision: https://reviews.llvm.org/D139106 - -commit e78e4a176147a1a971f2093b3a927f51479074ab -Author: Jean Perier -Date: Thu Dec 1 11:09:35 2022 +0100 - - [flang] lower F77 calls in HLFIR - - Use recently added hlfir.associate/hlfir.end_associate to deal - with the cases where the actual argument is an expression. - - Differential Revision: https://reviews.llvm.org/D139009 - -commit 131585ab0fd0ed43d6fd7325ff75d6fa6e623a4b -Author: Valentin Clement -Date: Thu Dec 1 11:12:01 2022 +0100 - - [flang] Use genExprBox for the rhs when calling PointerAssociate for unlimited polymorphic pointer - - In D139019 the assumption was made that the rhs was also the MutableBox - but this is not a constraint. Use genExprBox instead. Also the allowed - conversion in D139019 was not correct. Remoed it since it is not needed anymore. - - Reviewed By: jeanPerier - - Differential Revision: https://reviews.llvm.org/D139081 - -commit abefd87e706a47303905edcff031a22edf880921 -Author: Valentin Clement -Date: Wed Nov 30 18:57:28 2022 +0100 - - [flang] Delegate pointer association to class(*) pointer to the runtime - - Pointer association with an unlimited polymorphic pointer on the lhs - requires more than just updating the base_addr. Delegate the association to - the runtime function `PointerAssociation`. - - Reviewed By: PeteSteinfeld - - Differential Revision: https://reviews.llvm.org/D139019 - -commit 1bd0ff7a90593d3cf363325ff797bc5efa7928e0 -Author: Valentin Clement -Date: Wed Nov 30 15:53:01 2022 +0100 - - [flang] Allow non polymorphic pointer assignment with polymorphic rhs - - Remove the TODO and allow pointer assignment with non - polymorphic entity on the lhs. The assignment follow the same scheme - as derived-type pointer assignment to parent component. - - Reviewed By: jeanPerier - - Differential Revision: https://reviews.llvm.org/D138998 - -commit d38735e601d97d5dad4e6c7e7452632d5954f652 -Author: Valentin Clement -Date: Tue Nov 22 15:13:18 2022 +0100 - - [flang][NFC] Switch CollectBindings return to SymbolVector - - As suggested on D138129, switching rteurn of CollectBindings - function to SymbolVector. - - Reviewed By: jeanPerier - - Differential Revision: https://reviews.llvm.org/D138419 - -commit 81bd5e2ef7332f8ccad5ffeef931ed7d87d548e7 -Author: Valentin Clement -Date: Tue Nov 22 10:11:50 2022 +0100 - - Revert "[flang][NFC] Switch CollectBindings return to SymbolVector" - - This reverts commit 97e8eeb758fcae4f2afd9ac516ffc9509b4daaf0. - -commit 97e8eeb758fcae4f2afd9ac516ffc9509b4daaf0 -Author: Valentin Clement -Date: Tue Nov 22 09:42:32 2022 +0100 - - [flang][NFC] Switch CollectBindings return to SymbolVector - - As suggested on D138129, switching rteurn of CollectBindings - function to SymbolVector. - - Reviewed By: jeanPerier - - Differential Revision: https://reviews.llvm.org/D138419 - -commit 6393d2ea24fb458c353f8d453ab5f20663875cb1 -Author: Valentin Clement -Date: Thu Nov 17 10:53:13 2022 +0100 - - [flang] Create fir.dispatch_table and fir.dt_entry operations - - Create the fir.dispatch_table operation based on semantics - information. The fir.dispatch_table will be used for static devirtualization - as well as for fir.select_type conversion. - - Depends on D138129 - - Reviewed By: jeanPerier, PeteSteinfeld - - Differential Revision: https://reviews.llvm.org/D138131 - -commit dd73bfa6d6da9435cef8fc1e759abd5fad32fa50 -Author: Jean Perier -Date: Tue Nov 15 12:01:21 2022 +0100 - - [flang] Lower intrinsic assignment to fir.assign - - Lower intrinsic assignment to hlfir.assign, except when the LHS - is a whole allocatable (this part will be done later to keep patch - simpler). - - Differential Revision: https://reviews.llvm.org/D138013 - -commit f677c5ee97911561c9948684029aef15b1f5cdd0 -Author: Valentin Clement -Date: Mon Nov 14 10:46:53 2022 +0100 - - [flang] Initial lowering of SELECT TYPE construct to fir.select_type operation - - This patch is the initial path to lower the SELECT TYPE construct to the - fir.select_type operation. More work is required in the AssocEntity - mapping but it will be done in a follow up patch to ease the review. - - Reviewed By: jeanPerier - - Differential Revision: https://reviews.llvm.org/D137728 - -commit fcfb620db55ec6fde832254f38a207da24399a2d -Author: Jean Perier -Date: Mon Nov 14 10:37:04 2022 +0100 - - [flang][NFC] rename hlfir::FortranEntity into EntityWithAttributes - - This reflects the fact that Attributes will not always be visible when - looking at an HLFIR variable. The EntityWithAttributes class is used - to denote in the compiler code that the value at hand has visible - attributes. It is intended to be used in lowering so that the code - can query about operands attributes when generating code. - - Differential Revision: https://reviews.llvm.org/D137792 - -commit 8f3f15c1a208932689a8bdef22d6ca3d4c3408c5 -Author: Slava Zakharin -Date: Mon Nov 7 09:05:27 2022 -0800 - - [flang] Configure FirOpBuilder based on math driver options. - - Added MathOptionsBase to share fastmath config between different - components. Frontend driver translates LangOptions into MathOptionsBase. - FirConverter configures FirOpBuilder using MathOptionsBase - config passed to it via LoweringOptions. - - Depends on D137390 - - Reviewed By: jeanPerier - - Differential Revision: https://reviews.llvm.org/D137391 - -commit 3952377f71dc1d1aa1627c4c7f82d51163b8fa80 -Author: Jean Perier -Date: Wed Nov 2 08:36:45 2022 +0100 - - [flang] lower intrinsic constants to HLFIR - - Use the utility to lower Constant that was split from current lowering - in https://reviews.llvm.org/D136955. - - The difference in HLFIR is the addition of a fir.declare on constant - outlined in memory so that all the information about them is available. - - Lowering to HLFIR is enabled in Brideg::genExprValue to allow testing - of scalar constant lowering. - - Differential Revision: https://reviews.llvm.org/D137084 - -commit 880b37f175c7f7ce9e5684ecb2713de66f79cec7 -Author: Valentin Clement -Date: Tue Nov 1 21:46:07 2022 +0100 - - [flang] Handle pointer assignment with polymorphic entities - - This patch forces pointer and allocatable polymorphic entities to be - tracked as descriptor. It also enables the pointer assignment between - polymorphic entities. Pointer association between a non-polymorphic - pointer and a polyrmophic target might require some more work as - per 10.2.2.3 point 1. - - Reviewed By: PeteSteinfeld - - Differential Revision: https://reviews.llvm.org/D137150 - -commit 90e9fcbb68a3afa3ac25aa54555355634554c349 -Author: Valentin Clement -Date: Mon Oct 31 11:02:50 2022 +0100 - - [flang] Set declared type when NULLIFY a polymorphic pointer - - Fortran standard 7.3.2.3 point 7 mentions that a diassociated - pointer dynamic type is its declared type. - in 9.7.2 note 1, when a NULLIFY statement is applied to a polymorphic pointer, - its dynamic type becomes the same as its declared type. - This patch enforce these standard points by calling the runtime function - `PointerNullifyDerived` with the declared type descriptor. - - Reviewed By: jeanPerier - - Differential Revision: https://reviews.llvm.org/D136948 - -commit c14ef2d762e4f7de2d892af111d87aec79b8cd6f -Author: Jean Perier -Date: Mon Oct 24 15:35:19 2022 +0200 - - [flang] Add kernel to lower expressions to HLFIR - - This patch adds the kernel to lower evaluate::Expr to HLFIR to a - hlfir::FortranEntity (a single mlir::Value that can be interpreted as - a Fortran variable or the value of a Fortram expression). - - It implements lowering of simple name designators ("x") and starts - adding a translation layer in AbstractConverter::genExprBox and - AbstractConverter::genExprAddr so that the new expression lowering - can be used without any changes for now in the current statement and - construct lowering. - - Differential Revision: https://reviews.llvm.org/D136453 - -commit 9e37301cf483237695325e199393ba5a84b7fc1e -Author: Jean Perier -Date: Tue Oct 18 11:07:47 2022 +0200 - - [flang][NFC] Simplify mapSymbolAttributes in symbol lowering - - mapSymbolAttributes currently has a lot of very similar code for - each kind of explicit shape and scalar symbols. - - Refactor it so that the change to lower symbols with fir.declare - can be added in centralized places instead of being scattered. - This is a preparation patch and fir.declare is not yet added. - - Differential Revision: https://reviews.llvm.org/D136061 - -commit 205b47401ed6517cc34e473838e1d307715cc9a4 -Author: Peixin Qiao -Date: Mon Oct 17 23:27:17 2022 +0800 - - [flang] Fix the trivial type passed as value with bind(C) - - In the callee side, the value cannot be used directly. For example, the - dummy argument is lhs variable or the dummy argument is passed to - another procedure as actual argument. - - Fix this by allocating one temporary storage and store the value. Then - map the symbol of dummy argument to the `mlir::Value` of the temporary. - - Reviewed By: jeanPerier - - Differential Revision: https://reviews.llvm.org/D136009 - -commit 4546397e39589f0a6a707218349d1bf65fe54645 -Author: Jean Perier -Date: Mon Oct 17 09:57:16 2022 +0200 - - [flang] Introduce option to lower expression to HLFIR - - Preliminary work on HLFIR. Introduce option that will allow testing - lowering via HLFIR until this is ready to replace the current expression - lowering. - - See https://reviews.llvm.org/D134285 for more context about the plan. - - Differential Revision: https://reviews.llvm.org/D135959 - -commit 0ec3ac9b7fbd15698af7289e1214e8ff3d82ec14 -Author: Jonathon Penix -Date: Tue Jul 19 11:47:25 2022 -0700 - - [Flang] Add -fconvert option to swap endianness for unformatted files. - - To accomplish this, this patch creates an optional list of environment - variable default values to be set by the runtime to allow directly using - the existing runtime implementation of FORT_CONVERT for I/O conversions. - -commit f4accbf55f4d0fcd6d7cc6f7632a0e4b69c9f3dd -Author: Peixin Qiao -Date: Wed Oct 5 20:22:33 2022 +0800 - - [flang][OpenMP] Support privatization for single construct - - This supports the lowering of private and firstprivate clauses in single - construct. The alloca ops are emitted in the entry block according to - https://llvm.org/docs/Frontend/PerformanceTips.html#use-of-allocas, and - the load/store ops are emitted in the single region. The data race - problem is handled in OMPIRBuilder. That is, the barrier is emitted in - OMPIRBuilder. - - Co-authored-by: Nimish Mishra - - Reviewed By: kiranchandramohan - - Differential Revision: https://reviews.llvm.org/D128596 - -commit de3efd1b4c8e120c37b24e7cc264b5a117641bb1 -Author: Valentin Clement -Date: Sat Sep 24 08:58:50 2022 +0200 - - [flang] Lower character result of bind(c) function by value - - BIND(C) Function returning character must return it by value and - not as hidden argument like done currently. This patch update the - code to return it by value for both use cases. - - Reviewed By: PeteSteinfeld - - Differential Revision: https://reviews.llvm.org/D134530 - -commit e6238ab52590a4f31bc8c6e806c7947e9ef04b57 -Author: Jean Perier -Date: Tue Sep 20 10:39:39 2022 +0200 - - [flang] Deallocate WHERE masks after all assignments - - Allocatable assignments were triggering lowering to clean-up - any WHERE mask temporaries, even if some assignments where left - in the WHERE construct. - - This is because allocatable assignments lowering was being passed the - wrong statement context. Fix this by selecting the where/forall statement - context instead of a local one when there is one. - - Differential Revision: https://reviews.llvm.org/D134197 - -commit 78c40b3c53ccfa272528d7d4f03d5f25d0b7806e -Author: V Donaldson -Date: Wed Sep 7 21:22:59 2022 -0700 - - [flang] Control flow with empty select case blocks - - Fix control flow for empty select case blocks such as: - - select case (2) - case (1) - print*, '1' - case (2) - ! print*, '2' - case default - print*, 'default' - end select - -commit 109f9a291850a8e82f5026f68a382222a235c4f3 -Author: Peixin Qiao -Date: Tue Sep 6 08:15:01 2022 +0800 - - [flang] Support lowering of intrinsic module procedure C_F_POINTER - - As Fortran 2018 18.2.3.3, the intrinsic module procedure - C_F_POINTER(CPTR, FPTR [, SHAPE]) associates a data pointer with the - target of a C pointer and specify its shape. CPTR shall be a scalar of - type C_PTR, and its value is the C address or the result of a reference - to C_LOC. FPTR is one pointer, either scalar or array. SHAPE is a - rank-one integer array, and it shall be present if and only if FPTR is - an array. - - C_PTR is the derived type with only one component of integer 64, and the - integer 64 component value is the address. Build the right "source" - fir::ExtendedValue based on the address and shape, and use - associateMutableBox to associate the pointer with the target of the C - pointer. - - Refactor the getting the address of C_PTR to reuse the code. - - Reviewed By: jeanPerier - - Differential Revision: https://reviews.llvm.org/D132303 - -commit 8fbc7e0869293b9b61b7bce3edfa4de05b1c549d -Author: Slava Zakharin -Date: Thu Sep 1 12:33:39 2022 -0700 - - [flang] Make use of do variable more consistent. - - Instead of using the IV block argument of the do-loop we will use - the do-variable value loaded from its location. This usage is consistent - with other uses of the do-variable inside the loop. - - Differential Revision: https://reviews.llvm.org/D133140 - -commit 4943dbdf67bad8ddb6dbb6e31e4ce9a80ffd9097 -Author: Peixin Qiao -Date: Mon Aug 29 22:29:34 2022 +0800 - - [flang] Support lowering of C_PTR and C_FUNPTR argument with VALUE attribute - - As Fortran 2018 18.3.2, C_PTR is interoperable with any C object pointer - type. C_FUNPTR is interoperable with any C function pointer type. As - 18.3.6, a C pointer can correspond to a Fortran dummy argument of type - C_PTR with the VALUE attribute. - - The interface for type(C_PTR)/type(C_FUNPTR) argument with value - attribute is different from the the usual derived type. For type(C_PTR) - or type(C_FUNPTR), the component is the address, and the interface is - a pointer even with VALUE attribute. For a usual derived type such as - the drived type with the component of integer 64, the interface is a i64 - value when it has VALUE attribute on aarch64 linux. - - To lower the type(C_PTR)/type(C_FUNPTR) argument with value attribute, - get the value of the component of the type(C_PTR)/type(C_FUNPTR), which - is the address, and then convert it to the pointer and pass it. - - Reviewed By: Jean Perier - - Differential Revision: https://reviews.llvm.org/D131583 - -commit af7edf1557d87026bb4dd4783f60e766538e923c -Author: Slava Zakharin -Date: Thu Aug 18 14:06:19 2022 -0700 - - [flang] Keep original data type for do-variable value. - - Keep the original data type of integer do-variables - for structured loops. When do-variable's data type - is an integer type shorter than IndexType, processing - the do-variable separately from the DoLoop's iteration index - allows getting rid of type casts, which can make backend - optimizations easier. - - For example, - ``` - do i = 2, n-1 - do j = 2, n-1 - ... = a(j-1, i) - end do - end do - ``` - - If value of 'j' is computed by casting the DoLoop's iteration - index to 'i32', then Flang will produce the following LLVM IR: - ``` - %1 = trunc i64 %iter_index to i32 - %2 = sub i32 %1, 1 - %3 = sext i32 %2 to i64 - ``` - - LLVM's InstCombine may try to get rid of the sign extension, - and may transform this into: - ``` - %1 = shl i64 %iter_index, 32 - %2 = add i64 %1, -4294967296 - %3 = ashr exact i64 %2, 32 - ``` - - The extra computations for the element address applied on top - of this awkward pattern confuse LLVM vectorizer so that - it does not recognize the unit-strided access of 'a'. - - Measured performance improvements on `SPEC CPU2000@IceLake`: - ``` - 168.wupwise: 11.96% - 171.swim: 11.22% - 172.mrgid: 56.38% - 178.galgel: 7.29% - 301.apsi: 8.32% - ``` - - Differential Revision: https://reviews.llvm.org/D132176 - -commit 06b551c944ff1cb4a21ca39c9e5ee6f67fc282ee -Author: Kazu Hirata -Date: Sat Aug 20 21:18:27 2022 -0700 - - Use llvm::is_contained (NFC) - -commit 83fa97567949ce16d58b62ecff48930efb1d80bb -Author: Valentin Clement -Date: Sat Aug 13 20:40:03 2022 +0200 - - [flang][openacc] Handle array section and derived-type components operands - - This patch lowers correctly operands with array section - and derived-type component. - - Depends on D131764 - - Reviewed By: razvanlupusoru - - Differential Revision: https://reviews.llvm.org/D131765 - -commit 8fc00247cea5ac8290657867581e22b876a09d3b -Author: Valentin Clement -Date: Fri Aug 12 21:22:30 2022 +0200 - - [flang] Pass SemanticsContext to the LoweringBridge - - The SemanticsContext is needed to analyze expression later in the - lowering for directive languages. This patch allows to keep a reference of - the SemanticsContext in the LoweringBridge. - - Building block for D131765 - - Reviewed By: razvanlupusoru - - Differential Revision: https://reviews.llvm.org/D131764 - -commit 435feefbdd6c91faf24fa5e69c4e7c3bc127568a -Author: Nimish Mishra -Date: Fri Aug 12 16:46:26 2022 +0530 - - [flang][OpenMP] Lowering support for default clause - - This patch adds lowering support for default clause. - - 1. During symbol resolution in semantics, should the enclosing context - have a default data sharing clause defined and a `parser::Name` is not - attached to an explicit data sharing clause, the - `semantics::Symbol::Flag::OmpPrivate` flag (in case of - default(private)) and `semantics::Symbol::Flag::OmpFirstprivate` flag - (in case of default(firstprivate)) is added to the symbol. - - 2. During lowering, all symbols having either - `semantics::Symbol::Flag::OmpPrivate` or - `semantics::Symbol::Flag::OmpFirstprivate` flag are collected and - privatised appropriately. - - Co-authored-by: Peixin Qiao - - Reviewed by: peixin - - Differential Revision: https://reviews.llvm.org/D123930 - -commit 30b779d515e6aa9516f47d8c0ca54b11aaf98cb2 -Author: Nimish Mishra -Date: Fri Aug 12 16:45:38 2022 +0530 - - Revert "[flang][OpenMP] Lowering support for default clause" - - This reverts commit 6a305c9b49dd28eaeae694fce5755e279fbc884c. - -commit 6a305c9b49dd28eaeae694fce5755e279fbc884c -Author: Nimish Mishra -Date: Fri Aug 12 22:05:43 2022 +0530 - - [flang][OpenMP] Lowering support for default clause - - This patch adds lowering support for default clause. - - 1. During symbol resolution in semantics, should the enclosing context have - a default data sharing clause defined and a `parser::Name` is not attached - to an explicit data sharing clause, the - `semantics::Symbol::Flag::OmpPrivate` flag (in case of default(private)) - and `semantics::Symbol::Flag::OmpFirstprivate` flag (in case of - default(firstprivate)) is added to the symbol. - - 2. During lowering, all symbols having either - `semantics::Symbol::Flag::OmpPrivate` or - `semantics::Symbol::Flag::OmpFirstprivate` flag are collected and - privatised appropriately. - - Co-authored-by: Peixin Qiao - - Reviewed by: peixin - - Differential Revision: https://reviews.llvm.org/D123930 - -commit f1eb945f9a5037b1fac6da02405047b24c0c2de5 -Author: Slava Zakharin -Date: Tue Jul 19 20:39:58 2022 -0700 - - [flang] Propagate lowering options from driver. - - This commit addresses concerns raised in D129497. - - Propagate lowering options from driver to expressions lowering - via AbstractConverter instance. A single use case so far is - using optimized TRANSPOSE lowering with O1/O2/O3. - - bbc does not support optimization level switches, so it uses - default LoweringOptions (e.g. optimized TRANSPOSE lowering - is enabled by default, but an engineering -opt-transpose=false - option can still override this). - - Differential Revision: https://reviews.llvm.org/D130204 - -commit 4640a8a22cbae50c03c5f41d145916258a17a491 -Author: Peixin Qiao -Date: Wed Jul 27 23:37:55 2022 +0800 - - [NFC][flang] Add FIXME for privatization of loop bounds in Bridge.cpp - - There is post comment of adding TODO/FIXME for privatization of loop - bounds in D127137. D127137 fixes the bug in OpenMP firstprivate clause, - which should be refactored later according to the post comment. Add - FIXME for it. - - Differential Revision: https://reviews.llvm.org/D130625 - -commit 7ac2500eb05fde69235a4dfbc435dc9362fbcd83 -Author: Peixin Qiao -Date: Wed Jul 27 23:17:54 2022 +0800 - - [NFC][flang] Remove the unused header in Bridge.cpp - - The header file OpenMPDialect.h is added in Bridge.cpp in D130027, - but it is unused. Remove it. - - Differential Revision: https://reviews.llvm.org/D130625 - -commit f2b7f18e6375bb7eb6232f35c9ca30da8e20d7c7 -Author: Shraiysh Vaishay -Date: Tue Jul 26 19:18:27 2022 +0530 - - Revert "[flang][OpenMP] Lowering support for default clause" - - This reverts commit 05e6fce84fd39d150195b8928561f2c90c71e538. - -commit 05e6fce84fd39d150195b8928561f2c90c71e538 -Author: Nimish Mishra -Date: Tue Jul 26 14:08:34 2022 +0530 - - [flang][OpenMP] Lowering support for default clause - - This patch adds lowering support for default clause. - - 1. During symbol resolution in semantics, should the enclosing context have - a default data sharing clause defined and a `parser::Name` is not attached - to an explicit data sharing clause, the - `semantics::Symbol::Flag::OmpPrivate` flag (in case of `default(private)`) - and `semantics::Symbol::Flag::OmpFirstprivate` flag (in case of - `default(firstprivate)`) is added to the symbol. - - 2. During lowering, all symbols having either - `semantics::Symbol::Flag::OmpPrivate` or - `semantics::Symbol::Flag::OmpFirstprivate` flag are collected and - privatised appropriately. - - Co-authored-by: Peixin Qiao - - Reviewed By: kiranchandramohan - - Differential Revision: https://reviews.llvm.org/D123930 - -commit 3356d72a5ffa69324f8fdbc067c440cda8154797 -Author: Kazu Hirata -Date: Mon Jul 25 23:01:01 2022 -0700 - - [flang] Use value or * instead of getValue (NFC) - - This patch replaces x.getValue() with *x if the reference is obviously - protected by a presence check. Otherwise, it replaces x.getValue() - with x.value(). - -commit 17d9bdf4601983491d2b6f28e2b6b5a36f2688c7 -Author: Arnamoy Bhattacharyya -Date: Mon Jul 25 20:31:23 2022 -0400 - - [Flang][OpenMP] Add support for lastprivate clause for worksharing loop. - - This patch adds an initial support to the lastprivate clause for worksharing loop. The patch creates necessary control flow to guarantee the store of the value from the logical last iteration of the workshare loop. - - Reviewed By: kiranchandramohan - - Differential Revision: https://reviews.llvm.org/D130027 - -commit 7bb1151ba21e26d91ddaa83177bb58b4d1c36710 -Author: Kiran Chandramohan -Date: Mon Jul 25 18:21:17 2022 +0000 - - [Flang][OpenMP] Initial support for integer reduction in worksharing-loop - - Lower the Flang parse-tree containing OpenMP reductions to the OpenMP - dialect. The OpenMP dialect models reductions with, - 1) A reduction declaration operation that specifies how to initialize, combine, - and atomically combine private reduction variables. - 2) The OpenMP operation (like wsloop) that supports reductions has an array of - reduction accumulator variables (operands) and an array attribute of the same - size that points to the reduction declaration to be used for the reduction - accumulation. - 3) The OpenMP reduction operation that takes a value and an accumulator. - This operation replaces the original reduction operation in the source. - - (1) is implemented by the `createReductionDecl` in OpenMP.cpp, - (2) is implemented while creating the OpenMP operation, - (3) is implemented by the `genOpenMPReduction` function in OpenMP.cpp, and - called from Bridge.cpp. The implementation of (3) is not very robust. - - NOTE 1: The patch currently supports only reductions for integer type addition. - NOTE 2: Only supports reduction in the worksharing loop. - NOTE 3: Does not generate atomic combination region. - NOTE 4: Other options for creating the reduction operation include - a) having the reduction operation as a construct containing an assignment - and then handling it appropriately in the Bridge. - b) we can modify `genAssignment` or `genFIR(AssignmentStmt)` in the Bridge to - handle OpenMP reduction but so far we have tried not to mix OpenMP - and non-OpenMP code and this will break that. - I will try (b) in a separate patch. - NOTE 5: OpenMP dialect gained support for reduction with the patches: - D105358, D107343. See https://discourse.llvm.org/t/rfc-openmp-reduction-support/3367 - for more details. - - Reviewed By: awarzynski - - Differential Revision: https://reviews.llvm.org/D130077 - - Co-authored-by: Peixin-Qiao - -commit d507e8b70e4668f891d5df03f966c154cc4d5370 -Author: Arnamoy Bhattacharyya -Date: Mon Jul 11 09:01:15 2022 -0400 - - [flang][OpenMP] Fix firstprivate bug - - In case where the bound(s) of a workshare loop use(s) firstprivate var(s), currently, that use is not updated with the created clone. It still uses the shared variable. This patch fixes that. - - Reviewed By: peixin - - Differential Revision: https://reviews.llvm.org/D127137 - -commit 53804e426d9b552adaa1adb86a2df9014c41d42a -Author: Valentin Clement -Date: Thu Jul 7 09:37:12 2022 +0200 - - [flang][NFC] Make LEN parameters homogenous - - This patch is part of the upstreaming effort from fir-dev branch. - This is the last patch for the upstreaming effort. - - Reviewed By: jeanPerier - - Differential Revision: https://reviews.llvm.org/D129187 - - Co-authored-by: Eric Schweitz - -commit 23c2bedfd93cfacc62009425c464e659a34e92e6 -Author: Peter Klausler -Date: Fri Jul 1 11:40:44 2022 -0700 - - [flang] Establish a single source of target information for semantics - - Create a TargetCharacteristics class to centralize the few items of - target specific information that are relevant to semantics. Use the - new class for all target queries, including derived type component layout - modeling. - - Future work will initialize this class with target information - provided or forwarded by the drivers, and use it to fold layout-dependent - intrinsic functions like TRANSFER(). - - Differential Revision: https://reviews.llvm.org/D129018 - - Updates: Attempts to work around build issues on Windows. - -commit 0dd4fb0408ed980de2c100905e68a4adf9987c61 -Author: Valentin Clement -Date: Fri Jul 1 10:36:45 2022 +0200 - - [flang] Fix for broken/degenerate forall case - - Fix for broken/degenerate forall case where there is no assignment to an - array under the explicit iteration space. While this is a multiple - assignment, semantics only raises a warning. - The fix is to add a test that the explicit space has any sort of array - to be updated, and if not then the do_loop nest will not require a - terminator to forward array values to the next iteration. - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: jeanPerier - - Differential Revision: https://reviews.llvm.org/D128973 - - Co-authored-by: Eric Schweitz - -commit 39377d52273edb53a371f32a862df82f6b7f239d -Author: Valentin Clement -Date: Fri Jul 1 08:29:19 2022 +0200 - - [flang] Fix APFloat conversion cases - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: PeteSteinfeld - - Differential Revision: https://reviews.llvm.org/D128935 - - Co-authored-by: Eric Schweitz - Co-authored-by: Peter Steinfeld - -commit a19c2132122e78051bfc98c304afb528b7ca14ec -Author: Valentin Clement -Date: Thu Jun 30 16:56:29 2022 +0200 - - [flang][NFC] Fix warning - -commit 1e55ec6666fa687b1a86bdaa95ea814557855fd1 -Author: Valentin Clement -Date: Thu Jun 30 09:03:49 2022 +0200 - - [flang] SELECT CASE constructs with character selectors that require a temp - - Here is a character SELECT CASE construct that requires a temp to hold the - result of the TRIM intrinsic call: - - ``` - module m - character(len=6) :: s - contains - subroutine sc - n = 0 - if (lge(s,'00')) then - select case(trim(s)) - case('11') - n = 1 - case default - continue - case('22') - n = 2 - case('33') - n = 3 - case('44':'55','66':'77','88':) - n = 4 - end select - end if - print*, n - end subroutine - end module m - ``` - - This SELECT CASE construct is implemented as an IF/ELSE-IF/ELSE comparison - sequence. The temp must be retained until some comparison is successful. - At that point the temp may be freed. Generalize statement context processing - to allow multiple finalize calls to do this, such that the program always - executes exactly one freemem call. - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: klausler, vdonaldson - - Differential Revision: https://reviews.llvm.org/D128852 - - Co-authored-by: V Donaldson - -commit 3b7c3a654c9175f41ac871a937cbcae73dfb3c5d -Author: Kazu Hirata -Date: Sat Jun 25 11:56:50 2022 -0700 - - Revert "Don't use Optional::hasValue (NFC)" - - This reverts commit aa8feeefd3ac6c78ee8f67bf033976fc7d68bc6d. - -commit aa8feeefd3ac6c78ee8f67bf033976fc7d68bc6d -Author: Kazu Hirata -Date: Sat Jun 25 11:55:57 2022 -0700 - - Don't use Optional::hasValue (NFC) - -commit 27afb362b1e85dac21744b95ed9b48f7e9fd016c -Author: Peixin-Qiao -Date: Fri Jun 24 15:33:09 2022 +0800 - - [flang][OpenMP] Initial support the lowering of copyin clause - - This supports the lowering of copyin clause initially. The pointer, - allocatable, common block, polymorphic varaibles will be supported - later. - - This also includes the following changes: - - 1. Resolve the COPYIN clause and make the entity as host associated. - - 2. Fix collectSymbolSet by adding one option to control collecting the - symbol itself or ultimate symbol of it so that it can be used - explicitly differentiate the host and associated variables in - host-association. - - 3. Add one helper function `lookupOneLevelUpSymbol` to differentiate the - usage of host and associated variables explicitly. The previous - lowering of firstprivate depends on the order of - `createHostAssociateVarClone` and `lookupSymbol` of host symbol. With - this fix, this dependence is removed. - - 4. Reuse `copyHostAssociateVar` for copying operation of COPYIN clause. - - Reviewed By: kiranchandramohan, NimishMishra - - Differential Revision: https://reviews.llvm.org/D127468 - -commit ab89c132b547951945788fc2a0969cf64b0df4cd -Author: Valentin Clement -Date: Thu Jun 23 14:57:24 2022 +0200 - - [flang] Add lowering TODO for separate module procedures - - MODULE FUNCTION and MODULE SUBROUTINE currently cause lowering crash: - "symbol is not mapped to any IR value" because special care is needed - to handle their interface. - - Add a TODO for now. - - Example of program that crashed and will hit the TODO: - - ``` - module mod - interface - module subroutine sub - end subroutine - end interface - contains - module subroutine sub - x = 42 - end subroutine - end module - ``` - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: jeanPerier - - Differential Revision: https://reviews.llvm.org/D128412 - - Co-authored-by: Jean Perier - -commit ed8fceaa09cd66324c6efc1070f962731a62e2dc -Author: Kazu Hirata -Date: Mon Jun 20 23:35:53 2022 -0700 - - Don't use Optional::getValue (NFC) - -commit 5413bf1bac2abb9e06901686cdc959e92940143a -Author: Kazu Hirata -Date: Mon Jun 20 11:33:56 2022 -0700 - - Don't use Optional::hasValue (NFC) - -commit 331145e6e979ddb115e3bfd44d282828994d4e23 -Author: Valentin Clement -Date: Mon Jun 20 15:43:44 2022 +0200 - - [flang][NFC] Unify todo messages - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: jeanPerier - - Differential Revision: https://reviews.llvm.org/D128186 - - Co-authored-by: Peter Steinfeld - -commit 84b9ae662419ce97b3cb13879be431f6a0c9eaa4 -Author: Mats Petersson -Date: Tue Jun 7 14:00:08 2022 +0100 - - [flang]Add support for do concurrent - - [flang]Add support for do concurrent - - Upstreaming from fir-dev on https://github.com/flang-compiler/f18-llvm-project - - Support for concurrent execution in do-loops. - - A selection of tests are also added. - - Co-authored-by: V Donaldson - - Reviewed By: kiranchandramohan - - Differential Revision: https://reviews.llvm.org/D127240 - -commit 494cd9b6fc7ff3e9d9bfad08cfd329763b799749 -Author: Andrzej Warzynski -Date: Fri Jun 10 15:26:13 2022 +0000 - - [flang][lowering] Ignore compiler directives - - This patch simply replaces a `TODO` with a warning. - - This is part of the upstreaming effort from the `fir-dev` branch in [1]. - - [1] https://github.com/flang-compiler/f18-llvm-project - - Co-authored-by: Eric Schweitz - - Reviewed By: kiranchandramohan - - Differential Revision: https://reviews.llvm.org/D127415 - -commit 5b66cc1000f999e3fbf463a1a722249b8a6e51cb -Author: Valentin Clement -Date: Fri Jun 10 08:50:40 2022 +0200 - - [flang][NFC] Move Todo.h from Lower to Optimizer - - Remove a backwards dependence from Optimizer -> Lower by moving Todo.h - to the optimizer and out of lowering. - - This patch is part of the upstreaming effort from fir-dev branch. - - Co-authored-by: Eric Schweitz - - Reviewed By: jeanPerier - - Differential Revision: https://reviews.llvm.org/D127292 - -commit 7eecfc077f36fe249d5457e2d9a0e294cb25d615 -Author: Kiran Chandramohan -Date: Tue Jun 7 09:57:38 2022 +0000 - - [Flang] Add flag dependent code to execute the loop-body atleast once - - Given the flag `--always-execute-loop-body` the compiler emits code - to execute the body of the loop atleast once. - - Note: This is part of upstreaming from the fir-dev branch of - https://github.com/flang-compiler/f18-llvm-project. - - Reviewed By: awarzynski, schweitz - - Differential Revision: https://reviews.llvm.org/D127128 - - Co-authored-by: Jean Perier - Co-authored-by: Eric Schweitz - Co-authored-by: V Donaldson - Co-authored-by: Valentin Clement - Co-authored-by: Sameeran Joshi - -commit 411bd2d40788c8cb869dc4fdc37e01a57213cda9 -Author: Peixin-Qiao -Date: Tue Jun 7 15:08:17 2022 +0800 - - [flang][OpenMP] Support lowering parse-tree to MLIR for threadprivate directive - - This supports lowering parse-tree to MLIR for threadprivate directive - following the OpenMP 5.1 [2.21.2] standard. Take the following as an - example: - - ``` - program m - integer, save :: i - !$omp threadprivate(i) - call sub(i) - !$omp parallel - call sub(i) - !$omp end parallel - end - ``` - ``` - func.func @_QQmain() { - %0 = fir.address_of(@_QFEi) : !fir.ref - %1 = omp.threadprivate %0 : !fir.ref -> !fir.ref - fir.call @_QPsub(%1) : (!fir.ref) -> () - omp.parallel { - %2 = omp.threadprivate %0 : !fir.ref -> !fir.ref - fir.call @_QPsub(%2) : (!fir.ref) -> () - omp.terminator - } - return - } - ``` - - A threadprivate operation (omp.threadprivate) is created for all - references to a threadprivate variable. The runtime will appropriately - return a threadprivate var (%1 as above) or its copy (%2 as above) - depending on whether it is outside or inside a parallel region. For - threadprivate access outside the parallel region, the threadprivate - operation is created in instantiateVar. Inside the parallel region, it - is created in createBodyOfOp. - - One new utility function collectSymbolSet is created for collecting - all the variables with a property within a evaluation, which may be one - Fortran, or OpenMP, or OpenACC construct. - - Reviewed By: kiranchandramohan - - Differential Revision: https://reviews.llvm.org/D124226 - -commit 8c349d707ec2677e6235b4e9e3efa1e0c4de11f2 -Author: Kiran Chandramohan -Date: Wed Jun 1 11:48:20 2022 +0000 - - [Flang] Lower the infinite do loop - - The basic infinite loop is lowered to a branch to the body of the - loop, and the body containing a back edge as its terminator. - - Note: This is part of upstreaming from the fir-dev branch of - https://github.com/flang-compiler/f18-llvm-project. - - Reviewed By: rovka - - Differential Revision: https://reviews.llvm.org/D126697 - - Co-authored-by: Eric Schweitz - Co-authored-by: V Donaldson - -commit a159128811ae1d0a781dbc8590fa1e0f26642f82 -Author: Diana Picus -Date: Tue May 31 10:55:56 2022 +0000 - - [flang] Upstream lowering of real control loops - - Upstream the code for handling loops with real control variables from - the fir-dev branch at - https://github.com/flang-compiler/f18-llvm-project/tree/fir-dev/ - - Also add a test. - - Loops with real-valued control variables are always lowered to - unstructured loops. The real-valued control variables are handled the - same as integer ones, the only difference is that they need to use - floating point instructions instead of the integer equivalents. - - Co-authored-by: V Donaldson - -commit 11fb1aa5a40885188b014b3ccd326cc92e4a3b9e -Author: Diana Picus -Date: Wed May 25 12:51:10 2022 +0000 - - [flang] Upstream the lowering of the while loop - - Upstream the code for handling while loops from the fir-dev branch at - https://github.com/flang-compiler/f18-llvm-project/tree/fir-dev/ - - Also add tests. - - The while loop is lowered to a header block that checks the loop - condition and branches either to the exit block or to the body of the - loop. The body of the loop will unconditionally branch back to the - header. - - Differential Revision: https://reviews.llvm.org/D126636 - - Co-authored-by: Eric Schweitz - Co-authored-by: V Donaldson - -commit 1e1f60c605a9b1c803f3bbb1a1339c9bb1af4e34 -Author: V Donaldson -Date: Tue May 24 10:06:24 2022 -0700 - - [flang] Alternate entry points with unused arguments - - A dummy argument in an entry point of a subprogram with multiple - entry points need not be defined in other entry points. It is only - legal to reference such an argument when calling an entry point that - does have a definition. An entry point without such a definition - needs a local "substitute" definition sufficient to generate code. - It is nonconformant to reference such a definition at runtime. - Most such definitions and associated code will be deleted as dead - code at compile time. However, that is not always possible, as in - the following code. This code is conformant if all calls to entry - point ss set m=3, and all calls to entry point ee set n=3. - - subroutine ss(a, b, m, d, k) ! no x, y, n - integer :: a(m), b(a(m)), m, d(k) - integer :: x(n), y(x(n)), n - integer :: k - 1 print*, m, k - print*, a - print*, b - print*, d - if (m == 3) return - entry ee(x, y, n, d, k) ! no a, b, m - print*, n, k - print*, x - print*, y - print*, d - if (n /= 3) goto 1 - end - - integer :: xx(3), yy(5), zz(3) - xx = 5 - yy = 7 - zz = 9 - call ss(xx, yy, 3, zz, 3) - call ss(xx, yy, 3, zz, 3) - end - - Lowering currently generates fir::UndefOp's for all unused arguments. - This is usually ok, but cases such as the one here incorrectly access - unused UndefOp arguments for m and n from an entry point that doesn't - have a proper definition. - - The problem is addressed by creating a more complete definition of an - unused argument in most cases. This is implemented in large part by - moving the definition of an unused argument from mapDummiesAndResults - to mapSymbolAttributes. The code in mapSymbolAttributes then chooses - one of three code generation options, depending on information - available there. - - This patch deals with dummy procedures in alternate entries, and adds - a TODO for procedure pointers (the PFTBuilder is modified to analyze - procedure pointer symbol so that they are not silently ignored, and - instead hits proper TODOs). - - BoxAnalyzer is also changed because assumed-sized arrays were wrongfully - categorized as constant shape arrays. This had no impact, except when - there were unused entry points. - - Co-authored-by: jeanPerier - - Differential Revision: https://reviews.llvm.org/D125867 - -commit 3b390a1682232a0d6921692f72fac65ec4374597 -Author: Mats Petersson -Date: Wed Jul 7 16:58:32 2021 +0100 - - [flang][OpenMP] Support for Collapse - - Convert Fortran parse-tree into MLIR for collapse-clause. - - Includes simple Fortran to LLVM-IR test, with auto-generated - check-lines (some of which have been edited by hand). - - Reviewed By: kiranchandramohan, shraiysh, peixin - - Differential Revision: https://reviews.llvm.org/D125302 - -commit 1bffc75383a2285e69deda90cd10860769485234 -Author: Eric Schweitz -Date: Fri Apr 22 13:59:17 2022 -0700 - - Upstream support for POINTER assignment in FORALL. - - Reviewed By: vdonaldson, PeteSteinfeld - - Differential Revision: https://reviews.llvm.org/D125140 - -commit b85c39dd007858aac3edd915d802ff191bd58fe3 -Author: Kiran Chandramohan -Date: Fri May 6 11:45:18 2022 +0000 - - [Flang][OpenMP] Initial lowering of the OpenMP worksharing loop - - The OpenMP worksharing loop operation in the dialect is a proper loop - operation and not a container of a loop. So we have to lower the - parse-tree OpenMP loop construct and the do-loop inside the construct - to a omp.wsloop operation and there should not be a fir.do_loop inside - it. This is achieved by skipping fir.do_loop creation and calling genFIR - for the nested evaluations in the lowering of the do construct. - - Note: Handling of more clauses, parallel do, storage of loop index variable etc will come in separate patches. - - Part of the upstreaming effort to move LLVM Flang from fir-dev branch of - https://github.com/flang-compiler/f18-llvm-project to the LLVM Project. - - Reviewed By: peixin - - Differential Revision: https://reviews.llvm.org/D125024 - - Co-authored-by: Sourabh Singh Tomar - Co-authored-by: Shraiysh Vaishay - -commit aa0e167fab88cbef4d3f301534c86d143f2274ec -Author: Kiran Chandramohan -Date: Fri May 6 09:09:01 2022 +0000 - - [Flang] Lower Unstructured do loops - - The FIR `do_loop` is designed as a structured operation with a single - block inside it. Presence of unstructured constructs like jumps, exits - inside the loop will cause the loop to be marked as unstructured. These - loops are lowered using the `control-flow` dialect branch operations. - - Fortran semantics do not allow the loop variable to be modified inside - the loop. To prevent accidental modification, the iteration of the - loop is modeled by two variables, trip-count and loop-variable. - -> The trip-count and loop-variable are initialized in the pre-header. - The trip-count is set as (end-start+step)/step where end, start and - step have the usual meanings. The loop-variable is initialized to start. - -> The header block contains a conditional branch instruction which - selects between branching to the body of the loop or the exit block - depending on the value of the trip-count. - -> Inside the body, the trip-count is decremented and the loop-variable - incremented by the step value. Finally it branches to the header of the - loop. - - Part of the upstreaming effort to move LLVM Flang from fir-dev branch of - https://github.com/flang-compiler/f18-llvm-project to the LLVM Project. - - Reviewed By: awarzynski - - Differential Revision: https://reviews.llvm.org/D124837 - - Co-authored-by: Val Donaldson - Co-authored-by: Eric Schweitz - Co-authored-by: Jean Perier - Co-authored-by: Peter Klausler - -commit 2c8cb9acb51e2fa74bf9339ddd0884ef9d921dfc -Author: Jean Perier -Date: Fri Apr 29 14:52:27 2022 +0200 - - [flang] Handle common block with different sizes in same file - - Semantics is not preventing a named common block to appear with - different size in a same file (named common block should always have - the same storage size (see Fortran 2018 8.10.2.5), but it is a common - extension to accept different sizes). - - Lowering was not coping with this well, since it just use the first - common block appearance, starting with BLOCK DATAs to define common - blocks (this also was an issue with the blank common block, which can - legally appear with different size in different scoping units). - - Semantics is also not preventing named common from being initialized - outside of a BLOCK DATA, and lowering was dealing badly with this, - since it only gave an initial value to common blocks Globals if the - first common block appearance, starting with BLOCK DATAs had an initial - value. - - Semantics is also allowing blank common to be initialized, while - lowering was assuming this would never happen, and was never creating - an initial value for it. - - Lastly, semantics was not complaining if a COMMON block was initialized - in several scoping unit in a same file, while lowering can only generate - one of these initial value. - - To fix this, add a structure to keep track of COMMON block properties - (biggest size, and initial value if any) at the Program level. Once the - size of a common block appearance is know, the common block appearance - is checked against this information. It allows semantics to emit an error - in case of multiple initialization in different scopes of a same common - block, and to warn in case named common blocks appears with different - sizes. Lastly, this allows lowering to use the Program level info about - common blocks to emit the right GlobalOp for a Common Block, regardless - of the COMMON Block appearances order: It emits a GlobalOp with the - biggest size, whose lowest bytes are initialized with the initial value - if any is given in a scope where the common block appears. - - Lowering is updated to go emit the common blocks before anything else so - that the related GlobalOps are available when lowering the scopes where - common block appear. It is also updated to not assume that blank common - are never initialized. - - Differential Revision: https://reviews.llvm.org/D124622 - -commit b5b3e50f65ee99257041723e7645d44c1aeb1117 -Author: Kiran Chandramohan -Date: Thu Apr 28 12:20:11 2022 +0000 - - [Flang] Initial lowering of the Fortran Do loop - - This patch adds code to lower simple Fortran Do loops with loop control. - Lowering is performed by the the `genFIR` function when called with a - `Fortran::parser::DoConstruct`. `genFIR` function calls `genFIRIncrementLoopBegin` - then calls functions to lower the body of the loop and finally calls - the function `genFIRIncrementLoopEnd`. `genFIRIncrementLoopBegin` is - responsible for creating the FIR `do_loop` as well as storing the value of - the loop index to the loop variable. `genFIRIncrementLoopEnd` returns - the incremented value of the loop index and also stores the index value - outside the loop. This is important since the loop variable can be used - outside the loop. Information about a loop is collected in a structure - `IncrementLoopInfo`. - - Note 1: Future patches will bring in lowering for unstructured, - infinite, while loops - Note 2: This patch is part of upstreaming code from the fir-dev branch of - https://github.com/flang-compiler/f18-llvm-project. - - Reviewed By: awarzynski - - Differential Revision: https://reviews.llvm.org/D124277 - - Co-authored-by: Eric Schweitz - Co-authored-by: Jean Perier - Co-authored-by: Val Donaldson - Co-authored-by: Peter Klausler - Co-authored-by: Valentin Clement - -commit b6b8d34554a4d85ec064463b54a27e073c42beeb -Author: Peixin-Qiao -Date: Thu Apr 28 09:40:30 2022 +0800 - - [flang] Add lowering stubs for OpenMP/OpenACC declarative constructs - - This patch provides the basic infrastructure for lowering declarative - constructs for OpenMP and OpenACC. - - This is part of the upstreaming effort from the fir-dev branch in [1]. - [1] https://github.com/flang-compiler/f18-llvm-project - - Reviewed By: kiranchandramohan, shraiysh, clementval - - Differential Revision: https://reviews.llvm.org/D124225 - -commit acd75440c67acc31d9a5b0afdd64b5700be8960e -Author: Kiran Chandramohan -Date: Wed Apr 27 12:19:54 2022 +0000 - - [Flang] Lower the FailImage Statement - - Lowering of FailImage statement generates a runtime call and the - unreachable operation. The unreachable operation cannot terminate - a structured operation like the IF operation, hence mark as - unstructured. - - Note: This patch is part of upstreaming code from the fir-dev branch of - https://github.com/flang-compiler/f18-llvm-project. - - Reviewed By: clementval - - Differential Revision: https://reviews.llvm.org/D124520 - - Co-authored-by: Eric Schweitz - -commit 58ceae9561fbae75fa175244db4e0e4bfe7416fd -Author: River Riddle -Date: Mon Apr 18 11:53:47 2022 -0700 - - [mlir:NFC] Remove the forward declaration of FuncOp in the mlir namespace - - FuncOp has been moved to the `func` namespace for a little over a month, the - using directive can be dropped now. - -commit 07e16a2aae68a02629cbcb34a0c0b12cb84754f8 -Author: Kiran Chandramohan -Date: Mon Apr 11 09:05:00 2022 +0000 - - [Flang][OpenMP] Add implementation of privatisation - - Privatisation creates local copies of variables in the OpenMP region. - Two functions `createHostAssociateVarClone` and `copyHostAssociateVar` - are added to create a clone of the variable for basic privatisation and to - copy the contents for first-privatisation. - - Note: Tests for more data-types will be added when the fir.do_loop is - upstreamed. - - This is part of the upstreaming effort from the fir-dev branch in [1]. - [1] https://github.com/flang-compiler/f18-llvm-project - - Reviewed By: peixin, NimishMishra - - Differential Revision: https://reviews.llvm.org/D122595 - - Co-authored-by: Jean Perier - Co-authored-by: Eric Schweitz - Co-authored-by: Peter Klausler - Co-authored-by: Valentin Clement - Co-authored-by: Sourabh Singh Tomar - Co-authored-by: Nimish Mishra - Co-authored-by: Peixin-Qiao - -commit 534b228313409a781060856d6cb6c2e9523cba5b -Author: Valentin Clement -Date: Mon Mar 28 13:36:10 2022 +0200 - - [flang] Lower some coarray statements to their runtime functions - - This patch adds the lowering of coarray statements to the runtime - functions. The runtime functions are currently not implemented. - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: jeanPerier - - Differential Revision: https://reviews.llvm.org/D122466 - -commit 12d22cea73534ecf5d2d8c09181807e883523ac3 -Author: Valentin Clement -Date: Thu Mar 24 15:00:52 2022 +0100 - - [flang][OpenACC] Lower enter data directive - - This patch adds lowering for the `!$acc enter data` directive - from the PFT to OpenACC dialect. - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: PeteSteinfeld - - Differential Revision: https://reviews.llvm.org/D122384 - -commit 74f992929ecf660bb299e5767a1fba15e9181613 -Author: Valentin Clement -Date: Tue Mar 22 16:11:42 2022 +0100 - - [flang][NFC] Remove unused variable - - Fix for buildbot failure shown after fe252f8ed6369acdb13d4e290d3b9dfe2ec4eb8e - -commit fe252f8ed6369acdb13d4e290d3b9dfe2ec4eb8e -Author: Valentin Clement -Date: Tue Mar 22 15:40:32 2022 +0100 - - [flang] Lower boxed procedure - - In FIR, we want to wrap function pointers in a special box known as a - boxproc value. Fortran has a limited form of dynamic scoping - [https://tinyurl.com/2p8v2hw7] between "host procedures" and "internal - procedures". There are a number of implementations possible. - - Boxproc typed values abstract away the implementation details of when a - function pointer can be passed directly (as a raw address) and when a - function pointer has to account for the presence of a dynamic scope. - When lowering Fortran syntax to FIR, all function pointers are emboxed - as boxproc values. - - When creating LLVM IR, we must strip away the abstraction and produce - low-level LLVM "assembly" code. This patch implements that - transformation as converting the boxproc values to either raw function - pointers or executable trampolines on the stack as needed. The - trampoline then captures the dynamic scope context within an executable - thunk that can be passed instead of the function's raw address. - - Some extra handling is required for Fortran functions that return a - character value to deal with LEN values here. - - Some of the code in Bridge.cpp and ConvertExpr.cpp and be re-arranged to - faciliate the upstreaming effort. - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: jeanPerier, PeteSteinfeld - - Differential Revision: https://reviews.llvm.org/D122223 - - Co-authored-by: mleair - Co-authored-by: Jean Perier - Co-authored-by: Eric Schweitz - Co-authored-by: V Donaldson - Co-authored-by: Kiran Chandramohan - -commit 308fc3f27797ce2b0dc01970d6fe2c6c9e1f55c7 -Author: Valentin Clement -Date: Fri Mar 18 15:39:57 2022 +0100 - - [flang] Lower select case statement - - This patch adds lowering for the `select case` - statement. - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: jeanPerier - - Differential Revision: https://reviews.llvm.org/D122007 - - Co-authored-by: Jean Perier - Co-authored-by: Eric Schweitz - Co-authored-by: V Donaldson - -commit fb99266401960afd99845890720b4d9b3ecd63d2 -Author: Eric Schweitz -Date: Wed Mar 16 15:23:25 2022 -0700 - - [flang] Remove unused code and redundant assertion. - - Differential Revision: https://reviews.llvm.org/D121864 - -commit 9aeb7f035bdde83501e5eddd9e6ad175b8ed697f -Author: Valentin Clement -Date: Wed Mar 16 17:10:31 2022 +0100 - - [flang] Lower IO input with vector subscripts - - This patch adds lowering for IO input with vector subscripts. - It defines a VectorSubscriptBox class that allow representing and working - with a lowered Designator containing vector subscripts while ensuring - all the subscripts expression are only lowered once. - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: PeteSteinfeld - - Differential Revision: https://reviews.llvm.org/D121806 - - Co-authored-by: Jean Perier - Co-authored-by: Eric Schweitz - -commit 94a11063573b4e1a3405d4a73d9928083115a6c1 -Author: Valentin Clement -Date: Tue Mar 15 22:18:45 2022 +0100 - - [flang] Lower min|max intrinsics - - This patch adds lowering for the following intrinsics: - - `max` - - `maxloc` - - `maxval` - - `minloc` - - `minval` - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: PeteSteinfeld - - Differential Revision: https://reviews.llvm.org/D121701 - - Co-authored-by: Jean Perier - Co-authored-by: Eric Schweitz - Co-authored-by: mleair - -commit 8b5035333518c0363e4779dc1df855f06d3499ba -Author: Valentin Clement -Date: Tue Mar 15 22:03:14 2022 +0100 - - [flang] Lower alternate return - - This patch adds the lowering infrastructure for the lowering of - alternat returns. - - This patch is part of the upstreaming effort from fir-dev branch. - - Depends on D121698 - - Reviewed By: PeteSteinfeld - - Differential Revision: https://reviews.llvm.org/D121699 - - Co-authored-by: V Donaldson - Co-authored-by: Jean Perier - Co-authored-by: Eric Schweitz - -commit 76134f4138fcd1ef4fec989db8c050e793ee187d -Author: Valentin Clement -Date: Tue Mar 15 22:01:34 2022 +0100 - - [flang] Lower entry statement - - This patch add the lowering for the entry statement. - - This patch is part of the upstreaming effort from fir-dev branch. - - Depends on D121697 - - Reviewed By: PeteSteinfeld - - Differential Revision: https://reviews.llvm.org/D121698 - - Co-authored-by: V Donaldson - Co-authored-by: Jean Perier - Co-authored-by: Eric Schweitz - -commit a1425019e7207e8dc53e627aacfd547415a10b35 -Author: Valentin Clement -Date: Tue Mar 15 21:57:30 2022 +0100 - - [flang] Lower more pointer assignments/disassociation cases - - This patch lowers more cases of pointer assignments and - disassociations. - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: PeteSteinfeld, schweitz - - Differential Revision: https://reviews.llvm.org/D121697 - - Co-authored-by: V Donaldson - Co-authored-by: Jean Perier - Co-authored-by: mleair - Co-authored-by: Eric Schweitz - -commit d8222d91c6f41725aa7669bea24932e072bc2767 -Author: Valentin Clement -Date: Mon Mar 14 18:15:16 2022 +0100 - - [flang] Lower format statement - - This patch lowers the format statement. - - Reviewed By: PeteSteinfeld - - Differential Revision: https://reviews.llvm.org/D121611 - -commit 72276bdaff931910f62a84336b3e864ab48bac06 -Author: Valentin Clement -Date: Thu Mar 10 20:19:57 2022 +0100 - - [flang] Lower pointer component in derived type - - This patch lowers pointer component part of derived types to - FIR. - - This patch is part of the upstreaming effort from fir-dev branch. - - Depends on D121383 - - Reviewed By: PeteSteinfeld, schweitz - - Differential Revision: https://reviews.llvm.org/D121384 - - Co-authored-by: V Donaldson - Co-authored-by: Jean Perier - Co-authored-by: Eric Schweitz - -commit 88ae0d61c31674bd75144c246ae25b55ecc5bff9 -Author: Valentin Clement -Date: Thu Mar 10 19:43:11 2022 +0100 - - [flang] Lower general forall statement - - This patch lowers general forall statements. The forall - are lowered to nested loops. - - This patch is part of the upstreaming effort from fir-dev branch. - - Depends on D121385 - - Reviewed By: PeteSteinfeld, schweitz - - Differential Revision: https://reviews.llvm.org/D121386 - - Co-authored-by: V Donaldson - Co-authored-by: Jean Perier - Co-authored-by: Eric Schweitz - -commit 7a6a1655d83b3ff79d120e399d8b9cc7ad2b143c -Author: Valentin Clement -Date: Thu Mar 10 18:43:40 2022 +0100 - - [flang] Lower where statement - - This patch lowers where statement to FIR. - The where statement is lowered to a conbination of - loops and if conditions. - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: PeteSteinfeld - - Differential Revision: https://reviews.llvm.org/D121385 - - Co-authored-by: Jean Perier - Co-authored-by: Eric Schweitz - -commit e0f549a43ae02e65fce6c9c7a567fe3dc27bec9b -Author: Shraiysh Vaishay -Date: Thu Mar 10 22:40:23 2022 +0530 - - [flang] Added basic connect to lower OpenMP constructs - - Reviewed By: clementval - - Differential Revision: https://reviews.llvm.org/D121382 - -commit 589d51ea9f1a469cef2aae306859afaf6d7d5885 -Author: Valentin Clement -Date: Thu Mar 10 18:06:20 2022 +0100 - - [flang] Lower basic derived types - - This patch lowers basic derived type to FIR. - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: PeteSteinfeld - - Differential Revision: https://reviews.llvm.org/D121383 - - Co-authored-by: V Donaldson - Co-authored-by: Jean Perier - Co-authored-by: Eric Schweitz - -commit 092601d4baab7c13c06b31eda2d5bed91d9a6b65 -Author: Andrzej Warzynski -Date: Thu Mar 3 13:25:09 2022 +0000 - - [flang] Remove 'using namespace mlir;` from header files - - Currently, CGOps.h and FIROps.h contain `using namespace mlir;`. Every - file that includes one of these header files (directly and transitively) - will have the MLIR namespace enabled. With name-clashes within - sub-projects (LLVM and MLIR, MLIR and Flang), this is not desired. Also, - it is not possible to "un-use" a namespace once it is "used". Instead, - we should try to limit `using namespace` to implementation files (i.e. - *.cpp). - - This patch removes `using namespace mlir;` from header files and adjusts - other files accordingly. In header and TableGen files, extra namespace - qualifier is added when referring to symbols defined in MLIR. Similar - approach is adopted in source files that didn't require many changes. In - files that would require a lot of changes, `using namespace mlir;` is - added instead. - - Differential Revision: https://reviews.llvm.org/D120897 - -commit a49bf0ac381c456cdc3dcdf36378a361634f1123 -Author: Valentin Clement -Date: Tue Mar 8 22:08:02 2022 +0100 - - [flang] Lower associate construct - - This patch lowers the `associate` construct. - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: PeteSteinfeld - - Differential Revision: https://reviews.llvm.org/D121239 - - Co-authored-by: V Donaldson - Co-authored-by: Jean Perier - Co-authored-by: Eric Schweitz - -commit 78a127a3ef066451d1a77f452937cecfe25da64b -Author: Valentin Clement -Date: Tue Mar 8 20:17:48 2022 +0100 - - [flang] Lower computed and assigned goto - - This patch lowers the computed and assigned goto statements. - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: PeteSteinfeld, schweitz - - Differential Revision: https://reviews.llvm.org/D121219 - - Co-authored-by: V Donaldson - Co-authored-by: Jean Perier - -commit b3eb0e113e5f12f4fc88bda8bf5a653b00425f2b -Author: Valentin Clement -Date: Tue Mar 8 18:47:28 2022 +0100 - - [flang] Lower sum intrinsic - - This patch enables the lowering of the `sum` intrinsic. It adds - also infrastructure to deal with optional arguments in intrinsics and - implied loops. - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: PeteSteinfeld - - Differential Revision: https://reviews.llvm.org/D121221 - - Co-authored-by: Eric Schweitz - Co-authored-by: Jean Perier - Co-authored-by: mleair - -commit c5cf1b903409e491d7599809dc18187363d7be21 -Author: Valentin Clement -Date: Mon Mar 7 21:22:28 2022 +0100 - - [flang] Lower allocate and deallocate statements - - This patch add the lowering for the allocate - and the deallocate statements. - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: PeteSteinfeld - - Differential Revision: https://reviews.llvm.org/D121146 - - Co-authored-by: Eric Schweitz - Co-authored-by: Jean Perier - Co-authored-by: V Donaldson - -commit 764f95a8c78c33296aaee4a9ae8f1fba341a595b -Author: Valentin Clement -Date: Mon Mar 7 19:55:48 2022 +0100 - - [flang] Add lowering for host association - - This patches adds the code to handle host association for - inner subroutines and functions. - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: jeanPerier - - Differential Revision: https://reviews.llvm.org/D121134 - - Co-authored-by: Eric Schweitz - Co-authored-by: Jean Perier - Co-authored-by: V Donaldson - -commit 17d71347b2ede797346b0a499665cf51e593b150 -Author: Valentin Clement -Date: Wed Mar 2 18:26:13 2022 +0100 - - [flang] Handle module in lowering pass - - This patch enables the lowering of basic modules and functions/subroutines - in modules. - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: PeteSteinfeld - - Differential Revision: https://reviews.llvm.org/D120819 - - Co-authored-by: Eric Schweitz - Co-authored-by: Jean Perier - -commit 7e32cada0105ec8756ce09a9fc07e2b10803d620 -Author: Valentin Clement -Date: Wed Mar 2 18:02:41 2022 +0100 - - [flang] Lower inquire statement - - This patch adds the lowering of the `inquire` statement. - - This patch is part of the upstreaming effort from fir-dev branch. - - Depends on D120822 - - Reviewed By: schweitz - - Differential Revision: https://reviews.llvm.org/D120823 - - Co-authored-by: Jean Perier - -commit 46f46a3763c494054227b2b0f551fc34fa9af367 -Author: Valentin Clement -Date: Wed Mar 2 17:58:38 2022 +0100 - - [flang] Lower basic IO file statements - - This patches adds lowering for couple of basic io statements such as `flush`, - `endfile`, `backspace` and `rewind` - - This patch is part of the upstreaming effort from fir-dev branch. - - Depends on D120821 - - Reviewed By: schweitz - - Differential Revision: https://reviews.llvm.org/D120822 - - Co-authored-by: Eric Schweitz - Co-authored-by: Jean Perier - -commit db48f7b2f7b3a3a62822bd41de84b9ccb6093b5f -Author: Valentin Clement -Date: Wed Mar 2 17:55:10 2022 +0100 - - [flang] Lower IO open and close statements - - This patch adds the lowering of open and close statements - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: schweitz - - Differential Revision: https://reviews.llvm.org/D120821 - - Co-authored-by: Jean Perier - -commit d88dfd2b311d5f7f8ab9faa0edfd380c1fd2d2b2 -Author: Valentin Clement -Date: Tue Mar 1 22:28:16 2022 +0100 - - [flang] Handle dynamic array lowering - - This patch enables dynamic array lowering - and use the funcationality inside some IO tests. - - This patch is part of the upstreaming effort from fir-dev branch. - - Depends on D120743 - - Reviewed By: PeteSteinfeld, schweitz - - Differential Revision: https://reviews.llvm.org/D120744 - - Co-authored-by: Eric Schweitz - Co-authored-by: Jean Perier - Co-authored-by: V Donaldson - -commit 8c22cb846f31c42ce1d19370025ad05c4db56256 -Author: Valentin Clement -Date: Tue Mar 1 21:47:40 2022 +0100 - - [flang] Lower basic IO statement - - This patch enables the lowering of the print, read and write - IO statements. - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: PeteSteinfeld, schweitz - - Differential Revision: https://reviews.llvm.org/D120743 - - Co-authored-by: Eric Schweitz - Co-authored-by: Jean Perier - Co-authored-by: V Donaldson - Co-authored-by: Kiran Chandramohan - -commit 23aa5a744666b281af807b1f598f517bf0d597cb -Author: River Riddle -Date: Sat Feb 26 14:49:54 2022 -0800 - - [mlir] Rename the Standard dialect to the Func dialect - - The last remaining operations in the standard dialect all revolve around - FuncOp/function related constructs. This patch simply handles the initial - renaming (which by itself is already huge), but there are a large number - of cleanups unlocked/necessary afterwards: - - * Removing a bunch of unnecessary dependencies on Func - * Cleaning up the From/ToStandard conversion passes - * Preparing for the move of FuncOp to the Func dialect - - See the discussion at https://discourse.llvm.org/t/standard-dialect-the-final-chapter/6061 - - Differential Revision: https://reviews.llvm.org/D120624 - -commit 37e84d9be06d52abeb387acdfb6dacb274c63da5 -Author: Valentin Clement -Date: Fri Feb 25 18:21:44 2022 +0100 - - [flang] Lower simple character return - - Handles function with character return. - - Character scalar results are passed as arguments in lowering so - that an assumed length character function callee can access the result - length. - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: PeteSteinfeld, schweitz - - Differential Revision: https://reviews.llvm.org/D120558 - - Co-authored-by: Eric Schweitz - Co-authored-by: Jean Perier - Co-authored-by: V Donaldson - -commit f9704f0cfb7a9edb86c0755bafef54cbd365743d -Author: Valentin Clement -Date: Thu Feb 24 21:09:40 2022 +0100 - - [flang] Simple array assignment lowering - - This patch handles lowering of simple array assignment. - - ``` - a(:) = 10 - ``` - - or - - ``` - a(1) = 1 - ``` - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: PeteSteinfeld, schweitz - - Differential Revision: https://reviews.llvm.org/D120501 - - Co-authored-by: Jean Perier - Co-authored-by: V Donaldson - Co-authored-by: Eric Schweitz - -commit 2a59ead118065012446bdbd0a31dc52799212f87 -Author: Valentin Clement -Date: Thu Feb 24 18:11:41 2022 +0100 - - [flang] Lower allocatable assignment for scalar - - Add lowering for simple assignement on allocatable - scalars. - - This patch is part of the upstreaming effort from fir-dev branch. - - Depends on D120483 - - Reviewed By: PeteSteinfeld - - Differential Revision: https://reviews.llvm.org/D120488 - - Co-authored-by: Eric Schweitz - Co-authored-by: Jean Perier - -commit d0b70a070aedc3665e352d06c7d996a4050f8fc8 -Author: Valentin Clement -Date: Wed Feb 23 19:48:07 2022 +0100 - - [flang] Lower function and subroutine calls - - This patch introduce basic function/subroutine calls. - Because of the state of lowering only simple scalar arguments - can be used in the calls. This will be enhanced in follow up - patches with arrays, allocatable, pointer ans so on. - - ``` - subroutine sub1() - end - - subroutine sub2() - call sub1() - end - ``` - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: schweitz - - Differential Revision: https://reviews.llvm.org/D120419 - - Co-authored-by: Eric Schweitz - Co-authored-by: Jean Perier - Co-authored-by: V Donaldson - -commit e641c29f41971597dbe190f98784f0e4cfc220cc -Author: Valentin Clement -Date: Thu Feb 17 18:23:22 2022 +0100 - - [flang] Lower simple scalar assignment - - This patch hanlde lowering of simple scalar assignment. - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: PeteSteinfeld - - Differential Revision: https://reviews.llvm.org/D120058 - - Co-authored-by: Jean Perier - -commit da7c77b82c217592cc14f5b5a3c6a9e6741896af -Author: Valentin Clement -Date: Wed Feb 16 20:27:23 2022 +0100 - - [flang] Handle lowering arguments in subroutine and function - - This patch adds infrsatrcutrue to be able to lower - arguments in functions and subroutines. - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: PeteSteinfeld - - Differential Revision: https://reviews.llvm.org/D119957 - - Co-authored-by: Eric Schweitz - Co-authored-by: Jean Perier - -commit ad40cc14a8b728dedc20c9397489bda50185b176 -Author: Valentin Clement -Date: Mon Feb 14 21:31:46 2022 +0100 - - [flang] Lower basic function with scalar integer/logical return value - - This patch allows the lowring of simple empty function with a - scalar integer or logical return value. - The code in ConvertType.cpp is cleaned up as well. This file was landed - together with the initial flang push and lowering was still a prototype - at that time. Some more cleaning will come with follow up patches. - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: PeteSteinfeld - - Differential Revision: https://reviews.llvm.org/D119698 - - Co-authored-by: Jean Perier - -commit 85b89ed213c41a8d7dafff957c8d20a247e6d9df -Author: Valentin Clement -Date: Thu Feb 10 18:35:16 2022 +0100 - - [flang] Lower simple RETURN statement - - This patch adds the lowering for the RETURN statement - without alternate returns in the main program or in subroutine - and functions. - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: kiranchandramohan - - Differential Revision: https://reviews.llvm.org/D119429 - - Co-authored-by: V Donaldson - Co-authored-by: Eric Schweitz - Co-authored-by: Jean Perier - -commit ae37bb9804c7b6ee7e6d1c070889c30f74be1001 -Author: Kiran Chandramohan -Date: Tue Feb 8 23:01:39 2022 +0000 - - [Flang] Add support for lowering the goto statement - - This patch adds support for lowering the Fortran goto statement from - parse-tree to MLIR. The goto statement in Fortran is a form of - unstructured control flow. The statement transfers control to the - code starting at the label specified in the statement. This can be - faithfully represented in MLIR by a branch instruction. - - To assist the lowering of code with unstructured control flow, blocks - are created in advance and associated with the relevant pre-fir tree - evaluations. - - This is part of the upstreaming effort from the fir-dev branch in [1]. - - [1] https://github.com/flang-compiler/f18-llvm-project - - Reviewed By: clementval, vdonaldson, schweitz, awarzynski - - Differential Revision: https://reviews.llvm.org/D118983 - - Co-authored-by: V Donaldson - Co-authored-by: Jean Perier - Co-authored-by: Eric Schweitz - -commit 2c2e5a5d0f191027ab447899599baeaf744fc2eb -Author: Valentin Clement -Date: Mon Feb 7 09:12:17 2022 +0100 - - [flang] Basic local variable lowering - - This patch add lowering for simple local variable. - - - The signatures in `ConvertType.h` have been simplified to take advantage of the `AbstractConverter`. - - The lowering make use of the `allocateLocal` from the `FirOpBuilder`. - - This lowering is used in patch D118982 - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: kiranchandramohan, jeanPerier, schweitz - - Differential Revision: https://reviews.llvm.org/D118978 - -commit dc6a344637a6417aedf85e5fd1236e7fcd43c3f1 -Author: Valentin Clement -Date: Thu Feb 3 10:40:19 2022 +0100 - - [flang] Add lowering for integer constant - - This patch enables the lowering of integer constant. - - The `ScalarExprLowering` class is introduced in `ConvertExpr.cpp` to help - the lowering of expression. This patch adds all the placeholder as well for future - expression lowering with the appropriate TODOs. - - Integer constant expression are lowered to `arith.constant` with an integer type corresponding to the kind value. - - This patch is in support of D118787 - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: schweitz, kiranchandramohan - - Differential Revision: https://reviews.llvm.org/D118786 - -commit 415267407db27a6ca9818f1d269314fe92dd8042 -Author: Valentin Clement -Date: Wed Feb 2 18:44:09 2022 +0100 - - [flang] Switch return to ExtendedValue in AbstractConverter and Bridge - - Change the signature of `genExprAddr`, `genExprValue` to return a `fir::ExtendedValue` instead of a simple `mlir::Value` - - This patch is a preparation for more lowering to be upstream. It supports D118786 and D118787. - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: kiranchandramohan - - Differential Revision: https://reviews.llvm.org/D118785 - -commit db01b123d012df2f0e6acf7e90bf4ba63382587c -Author: Valentin Clement -Date: Wed Feb 2 08:15:26 2022 +0100 - - [flang] Lower PAUSE statement - - Lower the PAUSE statement to a runtime call. - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: kiranchandramohan, schweitz - - Differential Revision: https://reviews.llvm.org/D118699 - - Co-authored-by: Eric Schweitz - Co-authored-by: Jean Perier - -commit aab4263ad66c99da15cb9bda6aec8269d159b113 -Author: Valentin Clement -Date: Tue Feb 1 20:53:00 2022 +0100 - - [flang] Lower basic STOP statement - - This patch lowers STOP statement without arguments - and ERROR STOP. STOP statement with arguments lowering will - come in later patches ince it requires some expression lowering - to be added. - STOP statement is lowered to a runtime call. - - Also makes sure we are creating a constant in the MLIR arith constant. - - This patch is part of the upstreaming effort from fir-dev branch. - - Reviewed By: kiranchandramohan, schweitz - - Differential Revision: https://reviews.llvm.org/D118697 - - Co-authored-by: Eric Schweitz - -commit 89275300d861aef73225428c95fdb069de36186d -Author: Valentin Clement -Date: Tue Feb 1 15:26:47 2022 +0100 - - [flang] Add lowering for basic empty SUBROUTINE - - This patch adds the ability to lower an empty subroutine. - - Reviewed By: kiranchandramohan - - Differential Revision: https://reviews.llvm.org/D118695 - -commit 990759136a268fa51695d04f845ce7f9b36a842f -Author: Valentin Clement -Date: Tue Feb 1 13:49:49 2022 +0100 - - [flang] Add lowering placeholders - - This patch puts in place the differents - function to lower the evaluation list. All functions - are just placholders with TODOs for now. - Follow up patches will bring the proper lowering in these - functions. - - Reviewed By: jeanPerier - - Differential Revision: https://reviews.llvm.org/D118678 - -commit e1a12767ee628e179efc8733449f98018a686b4d -Author: Valentin Clement -Date: Fri Jan 28 22:39:44 2022 +0100 - - [flang] Initial lowering for empty program - - This patch enable lowering from Fortran to FIR for a basic empty - program. It brings all the infrastructure needed for that. As discussed - previously, this is the first patch for lowering and follow up patches - should be smaller. - - With this patch we can lower the following code: - - ``` - program basic - end program - ``` - - To a the FIR equivalent: - - ``` - func @_QQmain() { - return - } - ``` - - Follow up patch will add lowering of more complex constructs. - - Reviewed By: kiranchandramohan, schweitz, PeteSteinfeld - - Differential Revision: https://reviews.llvm.org/D118436 - -commit 1f8790050b0e99e7b46cc69518aa84f46f50738e -Author: Tim Keith -Date: Sat Mar 28 21:00:16 2020 -0700 - - [flang] Reformat with latest clang-format and .clang-format - - Original-commit: flang-compiler/f18@9fe84f45d7fd685051004678d6b5775dcc4c6f8f - Reviewed-on: https://github.com/flang-compiler/f18/pull/1094 - -commit 64ab3302d5a130c00b66a6957b2e7f0c9b9c537d -Author: CarolineConcatto <51754594+CarolineConcatto@users.noreply.github.com> -Date: Tue Feb 25 15:11:52 2020 +0000 - - [flang] [LLVMify F18] Compiler module folders should have capitalised names (flang-compiler/f18#980) - - This patch renames the modules in f18 to use a capital letter in the - module name - - Signed-off-by: Caroline Concatto - - Original-commit: flang-compiler/f18@d2eb7a1c443d1539ef12b6f027074a0eb15b1ea0 - Reviewed-on: https://github.com/flang-compiler/f18/pull/980