Outline globals properly for GPU-parallel loops

pull/78/head
A. R. Shajii 2022-09-30 10:43:58 -04:00
parent 0622bb42e1
commit a9fdefb5df
3 changed files with 26 additions and 18 deletions

View File

@ -1233,7 +1233,7 @@ template <typename T> OpenMPTransformData unpar(T *v) {
}
template <typename T>
OpenMPTransformData setupOpenMPTransform(T *v, BodiedFunc *parent) {
OpenMPTransformData setupOpenMPTransform(T *v, BodiedFunc *parent, bool gpu) {
if (!v->isParallel())
return unpar(v);
auto *M = v->getModule();
@ -1241,7 +1241,7 @@ OpenMPTransformData setupOpenMPTransform(T *v, BodiedFunc *parent) {
if (!parent || !body)
return unpar(v);
auto outline = util::outlineRegion(parent, body, /*allowOutflows=*/false,
/*outlineGlobals=*/true);
/*outlineGlobals=*/true, /*allByValue=*/gpu);
if (!outline)
return unpar(v);
@ -1400,7 +1400,7 @@ CollapseResult collapseLoop(BodiedFunc *parent, ImperativeForFlow *v, int64_t le
const std::string OpenMPPass::KEY = "core-parallel-openmp";
void OpenMPPass::handle(ForFlow *v) {
auto data = setupOpenMPTransform(v, cast<BodiedFunc>(getParentFunc()));
auto data = setupOpenMPTransform(v, cast<BodiedFunc>(getParentFunc()), /*gpu=*/false);
if (!v->isParallel())
return;
@ -1516,7 +1516,8 @@ void OpenMPPass::handle(ImperativeForFlow *v) {
}
}
auto data = setupOpenMPTransform(v, parent);
auto data =
setupOpenMPTransform(v, parent, (v->isParallel() && v->getSchedule()->gpu));
if (!v->isParallel())
return;
@ -1529,11 +1530,8 @@ void OpenMPPass::handle(ImperativeForFlow *v) {
auto *sched = v->getSchedule();
OMPTypes types(M);
if (sched->gpu && !sharedVars.empty()) {
warn("GPU-parallel loop cannot modify external variables; ignoring", v);
v->setParallel(false);
return;
}
// we disable shared vars for GPU loops
seqassertn(!(sched->gpu && !sharedVars.empty()), "GPU-parallel loop had shared vars");
// gather extra arguments
std::vector<Value *> extraArgs;

View File

@ -102,6 +102,7 @@ struct Outliner : public Operator {
SeriesFlow *flowRegion;
decltype(flowRegion->begin()) begin, end;
bool outlineGlobals; // whether to outline globals that are modified
bool allByValue; // outline all vars by value (can change semantics)
bool inRegion; // are we in the outlined region?
bool invalid; // if we can't outline for whatever reason
std::unordered_set<id_t> inVars; // vars used inside region
@ -115,10 +116,11 @@ struct Outliner : public Operator {
Outliner(BodiedFunc *parent, SeriesFlow *flowRegion,
decltype(flowRegion->begin()) begin, decltype(flowRegion->begin()) end,
bool outlineGlobals)
bool outlineGlobals, bool allByValue)
: Operator(), parent(parent), flowRegion(flowRegion), begin(begin), end(end),
outlineGlobals(outlineGlobals), inRegion(false), invalid(false), inVars(),
outVars(), modifiedInVars(), globalsToOutline(), inLoops(), outFlows() {}
outlineGlobals(outlineGlobals), allByValue(allByValue), inRegion(false),
invalid(false), inVars(), outVars(), modifiedInVars(), globalsToOutline(),
inLoops(), outFlows() {}
bool isEnclosingLoopInRegion(id_t loopId = -1) {
int d = depth();
@ -235,6 +237,8 @@ struct Outliner : public Operator {
for (auto *var : vars) {
if (!var->isGlobal())
set.insert(var->getId());
else if (inRegion && allByValue && !isA<Func>(var))
globalsToOutline.insert(var->getId());
}
}
@ -260,6 +264,9 @@ struct Outliner : public Operator {
// mod = shared AND modified in region
std::unordered_set<id_t> getModVars() {
if (allByValue)
return {};
std::unordered_set<id_t> modVars, shared = getSharedVars();
for (auto id : modifiedInVars) {
if (globalsToOutline.count(id) > 0 || shared.count(id) > 0)
@ -377,18 +384,18 @@ struct Outliner : public Operator {
OutlineResult outlineRegion(BodiedFunc *parent, SeriesFlow *series,
decltype(series->begin()) begin,
decltype(series->end()) end, bool allowOutflows,
bool outlineGlobals) {
bool outlineGlobals, bool allByValue) {
if (begin == end)
return {};
Outliner outliner(parent, series, begin, end, outlineGlobals);
Outliner outliner(parent, series, begin, end, outlineGlobals, allByValue);
parent->accept(outliner);
return outliner.outline(allowOutflows);
}
OutlineResult outlineRegion(BodiedFunc *parent, SeriesFlow *series, bool allowOutflows,
bool outlineGlobals) {
bool outlineGlobals, bool allByValue) {
return outlineRegion(parent, series, series->begin(), series->end(), allowOutflows,
outlineGlobals);
outlineGlobals, allByValue);
}
} // namespace util

View File

@ -51,11 +51,12 @@ struct OutlineResult {
/// @param end end of outlining (non-inclusive like standard iterators)
/// @param allowOutflows allow outlining regions with "out-flows"
/// @param outlineGlobals outline globals as arguments to outlined function
/// @param allByValue pass all outlined vars by value (can change semantics)
/// @return the result of outlining
OutlineResult outlineRegion(BodiedFunc *parent, SeriesFlow *series,
decltype(series->begin()) begin,
decltype(series->end()) end, bool allowOutflows = true,
bool outlineGlobals = false);
bool outlineGlobals = false, bool allByValue = false);
/// Outlines a series flow from its parent function. The outlined code
/// will be replaced by a call to the outlined function, and possibly
@ -64,9 +65,11 @@ OutlineResult outlineRegion(BodiedFunc *parent, SeriesFlow *series,
/// @param series the series flow on which outlining will happen
/// @param allowOutflows allow outlining regions with "out-flows"
/// @param outlineGlobals outline globals as arguments to outlined function
/// @param allByValue pass all outlined vars by value (can change semantics)
/// @return the result of outlining
OutlineResult outlineRegion(BodiedFunc *parent, SeriesFlow *series,
bool allowOutflows = true, bool outlineGlobals = false);
bool allowOutflows = true, bool outlineGlobals = false,
bool allByValue = false);
} // namespace util
} // namespace ir