diff --git a/codon/sir/transform/parallel/openmp.cpp b/codon/sir/transform/parallel/openmp.cpp index 0a4b397b..71dbef3a 100644 --- a/codon/sir/transform/parallel/openmp.cpp +++ b/codon/sir/transform/parallel/openmp.cpp @@ -1233,7 +1233,7 @@ template OpenMPTransformData unpar(T *v) { } template -OpenMPTransformData setupOpenMPTransform(T *v, BodiedFunc *parent) { +OpenMPTransformData setupOpenMPTransform(T *v, BodiedFunc *parent, bool gpu) { if (!v->isParallel()) return unpar(v); auto *M = v->getModule(); @@ -1241,7 +1241,7 @@ OpenMPTransformData setupOpenMPTransform(T *v, BodiedFunc *parent) { if (!parent || !body) return unpar(v); auto outline = util::outlineRegion(parent, body, /*allowOutflows=*/false, - /*outlineGlobals=*/true); + /*outlineGlobals=*/true, /*allByValue=*/gpu); if (!outline) return unpar(v); @@ -1400,7 +1400,7 @@ CollapseResult collapseLoop(BodiedFunc *parent, ImperativeForFlow *v, int64_t le const std::string OpenMPPass::KEY = "core-parallel-openmp"; void OpenMPPass::handle(ForFlow *v) { - auto data = setupOpenMPTransform(v, cast(getParentFunc())); + auto data = setupOpenMPTransform(v, cast(getParentFunc()), /*gpu=*/false); if (!v->isParallel()) return; @@ -1516,7 +1516,8 @@ void OpenMPPass::handle(ImperativeForFlow *v) { } } - auto data = setupOpenMPTransform(v, parent); + auto data = + setupOpenMPTransform(v, parent, (v->isParallel() && v->getSchedule()->gpu)); if (!v->isParallel()) return; @@ -1529,11 +1530,8 @@ void OpenMPPass::handle(ImperativeForFlow *v) { auto *sched = v->getSchedule(); OMPTypes types(M); - if (sched->gpu && !sharedVars.empty()) { - warn("GPU-parallel loop cannot modify external variables; ignoring", v); - v->setParallel(false); - return; - } + // we disable shared vars for GPU loops + seqassertn(!(sched->gpu && !sharedVars.empty()), "GPU-parallel loop had shared vars"); // gather extra arguments std::vector extraArgs; diff --git a/codon/sir/util/outlining.cpp b/codon/sir/util/outlining.cpp index fcd8b40c..f04613cc 100644 --- a/codon/sir/util/outlining.cpp +++ b/codon/sir/util/outlining.cpp @@ -102,6 +102,7 @@ struct Outliner : public Operator { SeriesFlow *flowRegion; decltype(flowRegion->begin()) begin, end; bool outlineGlobals; // whether to outline globals that are modified + bool allByValue; // outline all vars by value (can change semantics) bool inRegion; // are we in the outlined region? bool invalid; // if we can't outline for whatever reason std::unordered_set inVars; // vars used inside region @@ -115,10 +116,11 @@ struct Outliner : public Operator { Outliner(BodiedFunc *parent, SeriesFlow *flowRegion, decltype(flowRegion->begin()) begin, decltype(flowRegion->begin()) end, - bool outlineGlobals) + bool outlineGlobals, bool allByValue) : Operator(), parent(parent), flowRegion(flowRegion), begin(begin), end(end), - outlineGlobals(outlineGlobals), inRegion(false), invalid(false), inVars(), - outVars(), modifiedInVars(), globalsToOutline(), inLoops(), outFlows() {} + outlineGlobals(outlineGlobals), allByValue(allByValue), inRegion(false), + invalid(false), inVars(), outVars(), modifiedInVars(), globalsToOutline(), + inLoops(), outFlows() {} bool isEnclosingLoopInRegion(id_t loopId = -1) { int d = depth(); @@ -235,6 +237,8 @@ struct Outliner : public Operator { for (auto *var : vars) { if (!var->isGlobal()) set.insert(var->getId()); + else if (inRegion && allByValue && !isA(var)) + globalsToOutline.insert(var->getId()); } } @@ -260,6 +264,9 @@ struct Outliner : public Operator { // mod = shared AND modified in region std::unordered_set getModVars() { + if (allByValue) + return {}; + std::unordered_set modVars, shared = getSharedVars(); for (auto id : modifiedInVars) { if (globalsToOutline.count(id) > 0 || shared.count(id) > 0) @@ -377,18 +384,18 @@ struct Outliner : public Operator { OutlineResult outlineRegion(BodiedFunc *parent, SeriesFlow *series, decltype(series->begin()) begin, decltype(series->end()) end, bool allowOutflows, - bool outlineGlobals) { + bool outlineGlobals, bool allByValue) { if (begin == end) return {}; - Outliner outliner(parent, series, begin, end, outlineGlobals); + Outliner outliner(parent, series, begin, end, outlineGlobals, allByValue); parent->accept(outliner); return outliner.outline(allowOutflows); } OutlineResult outlineRegion(BodiedFunc *parent, SeriesFlow *series, bool allowOutflows, - bool outlineGlobals) { + bool outlineGlobals, bool allByValue) { return outlineRegion(parent, series, series->begin(), series->end(), allowOutflows, - outlineGlobals); + outlineGlobals, allByValue); } } // namespace util diff --git a/codon/sir/util/outlining.h b/codon/sir/util/outlining.h index c711221d..0eb8a9a1 100644 --- a/codon/sir/util/outlining.h +++ b/codon/sir/util/outlining.h @@ -51,11 +51,12 @@ struct OutlineResult { /// @param end end of outlining (non-inclusive like standard iterators) /// @param allowOutflows allow outlining regions with "out-flows" /// @param outlineGlobals outline globals as arguments to outlined function +/// @param allByValue pass all outlined vars by value (can change semantics) /// @return the result of outlining OutlineResult outlineRegion(BodiedFunc *parent, SeriesFlow *series, decltype(series->begin()) begin, decltype(series->end()) end, bool allowOutflows = true, - bool outlineGlobals = false); + bool outlineGlobals = false, bool allByValue = false); /// Outlines a series flow from its parent function. The outlined code /// will be replaced by a call to the outlined function, and possibly @@ -64,9 +65,11 @@ OutlineResult outlineRegion(BodiedFunc *parent, SeriesFlow *series, /// @param series the series flow on which outlining will happen /// @param allowOutflows allow outlining regions with "out-flows" /// @param outlineGlobals outline globals as arguments to outlined function +/// @param allByValue pass all outlined vars by value (can change semantics) /// @return the result of outlining OutlineResult outlineRegion(BodiedFunc *parent, SeriesFlow *series, - bool allowOutflows = true, bool outlineGlobals = false); + bool allowOutflows = true, bool outlineGlobals = false, + bool allByValue = false); } // namespace util } // namespace ir