Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft: Exo GPU code for discussion (spork) #736

Open
wants to merge 21 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/spork/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
*.aux
*.log
*.out
*.pdf
57 changes: 57 additions & 0 deletions docs/spork/common.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
\documentclass[11pt]{article}

\usepackage[letterpaper, portrait, margin=2.54cm]{geometry}
\usepackage{enumitem}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{amsfonts}
\usepackage{placeins}
\usepackage{graphicx}
\usepackage{listings}
\usepackage{caption}
\usepackage{colortbl}
\usepackage[parfill]{parskip}
\usepackage[mathscr]{euscript}
\usepackage[usenames,dvipsnames,svgnames,table,hyperref]{xcolor}
\usepackage[hidelinks]{hyperref}
\usepackage{fontspec}

\setsansfont{FreeSans}
\setmonofont{Ubuntu Mono}

\hyphenation{WebGL}

%\definecolor{webColor}{RGB}{0, 124, 204}
\definecolor{webColor}{RGB}{0, 108, 174}
\newcommand{\web}[1]{{\color{webColor} \small \url{#1}}}
\newcommand{\webText}[2]{{\color{webColor} \href{#2}{#1}}}
\newcommand{\email}[2]{{\small \color{webColor} \textsf{\href{mailto:#1@#2}{#1[at]#2}}}}
\definecolor{titleColor}{RGB}{0, 0, 0}
\ifdefined\RESUME
\newcommand{\myTitle}[1]{{\large \color{titleColor} \hspace{-12mm} \textbf{\textsf{#1}}}}
\else
\newcommand{\myTitle}[1]{{ \vspace{2mm} \large \color{titleColor} \hspace{-12mm} \textbf{\textsf{#1}} \vspace{2mm}}}
\fi
%\definecolor{subColor}{RGB}{97, 153, 0}
\definecolor{subColor}{RGB}{170, 149, 0}
\newcommand{\mySub}[1]{{\color{subColor}\hspace{-9mm} \textsf{#1}}}
%\definecolor{keyColor}{RGB}{170, 149, 0}
\definecolor{keyColor}{RGB}{118, 185, 0}
\newcommand{\myKey}[1]{{\color{keyColor}\textbf{#1}}}

\definecolor{hookColor}{RGB}{204, 0, 255}
\newcommand{\hook}[1]{{\color{hookColor}\textbf{#1}}}
\definecolor{flaggedBoxColor}{RGB}{255, 204, 204}
\newcommand{\flagged}[1]{{\color{black}\colorbox{flaggedBoxColor}{#1}}}

\definecolor{lightttColor}{RGB}{69, 69, 80}
\newcommand{\lighttt}[1]{{\color{lightttColor}\texttt{#1}}}
\newcommand{\graytt}[1]{{\color{gray}\texttt{#1}}}

\renewcommand*{\theenumi}{\alph{enumi}}
\renewcommand*\labelenumi{(\theenumi)}
\renewcommand*{\theenumii}{\roman{enumii}}
\renewcommand*\labelenumii{\theenumii.}

\sloppy
\newcommand{\abs}[1]{\left \vert #1 \right\vert}
639 changes: 639 additions & 0 deletions docs/spork/spork.tex

Large diffs are not rendered by default.

33 changes: 33 additions & 0 deletions src/exo/API_cursors.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from .core.LoopIR_pprint import _print_cursor
from .rewrite.LoopIR_scheduling import SchedulingError

from .spork.actor_kinds import actor_kind_dict, ActorKind

# --------------------------------------------------------------------------- #
# --------------------------------------------------------------------------- #
Expand Down Expand Up @@ -515,6 +516,36 @@ class PassCursor(StmtCursor):
"""


class SyncCursor(StmtCursor):
"""
Cursor pointing to a synchronization statement:
A // B
"""

def A(self):
"""Returns the "before" side of the barrier.

Return type is either str (name of a barrier variable) or an ActorKind"""
return self._get("A")

def B(self):
"""Returns the "after" side of the barrier.

Return type is either str (name of a barrier variable) or an ActorKind"""
return self._get("B")

def _get(self, name):
assert isinstance(self._impl, C.Node)
assert isinstance(self._impl._node, LoopIR.SyncStmt)
sym = getattr(self._impl._node, name)
val = actor_kind_dict.get(sym.name(), sym)
if isinstance(val, ActorKind):
assert val.sym == sym
else:
assert isinstance(val, str)
return val


class IfCursor(StmtCursor):
"""
Cursor pointing to an if statement:
Expand Down Expand Up @@ -900,6 +931,8 @@ def lift_cursor(impl, proc):
return AssignConfigCursor(impl, proc)
elif isinstance(n, LoopIR.Pass):
return PassCursor(impl, proc)
elif isinstance(n, LoopIR.SyncStmt):
return SyncCursor(impl, proc)
elif isinstance(n, LoopIR.If):
return IfCursor(impl, proc)
elif isinstance(n, LoopIR.For):
Expand Down
32 changes: 31 additions & 1 deletion src/exo/API_scheduling.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from .API import Procedure
import exo.API_cursors as PC
from .core.LoopIR import LoopIR, T
from .spork.actor_kinds import ActorKind
from .spork.loop_modes import LoopMode, seq, par, loop_mode_dict
import exo.rewrite.LoopIR_scheduling as scheduling
from .API_types import ExoType

Expand Down Expand Up @@ -349,6 +351,26 @@ def __call__(self, typ, all_args):
)


# Allow loop mode or actor kind (implicitly translated to async-for loop)
class LoopModeA(ArgumentProcessor):
def __call__(self, val, all_args):
if isinstance(val, ActorKind):
if val.is_synthetic():
self.err("cannot use synthetic ActorKind")
elif not val.is_async():
self.err("ActorKind must be async")
else:
val = loop_mode_dict.get(val.name)
if val:
val = val()
assert isinstance(
val, LoopMode
), "internal error, expected LoopMode instantiated for async ActorKind"
if not isinstance(val, LoopMode):
self.err("expected a LoopMode")
return val


# --------------------------------------------------------------------------- #
# --------------------------------------------------------------------------- #
# Cursor Argument Processing
Expand Down Expand Up @@ -897,7 +919,15 @@ def reorder_stmts(proc, block_cursor):
def parallelize_loop(proc, loop_cursor):
loop = loop_cursor._impl

ir, fwd = scheduling.DoParallelizeLoop(loop)
ir, fwd = scheduling.DoSetLoopMode(loop, par)
return Procedure(ir, _provenance_eq_Procedure=proc, _forward=fwd)


@sched_op([ForCursorA, LoopModeA])
def set_loop_mode(proc, loop_cursor, loop_mode):
loop = loop_cursor._impl

ir, fwd = scheduling.DoSetLoopMode(loop, loop_mode)
return Procedure(ir, _provenance_eq_Procedure=proc, _forward=fwd)


Expand Down
6 changes: 6 additions & 0 deletions src/exo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
from .core.extern import Extern

from . import stdlib
from .spork import actor_kinds
from .spork import loop_modes
from .spork import lane_units

__version__ = "0.2.1"

Expand All @@ -33,4 +36,7 @@
#
"stdlib",
"ExoType",
"actor_kinds",
"loop_modes",
"lane_units",
]
89 changes: 82 additions & 7 deletions src/exo/backend/LoopIR_compiler.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import functools
import re
import textwrap
import warnings
from collections import ChainMap
from collections import defaultdict
from dataclasses import dataclass
Expand All @@ -15,6 +16,9 @@
from ..core.prelude import *
from .win_analysis import WindowAnalysis
from ..rewrite.range_analysis import IndexRangeEnvironment
from ..spork.loop_modes import LoopMode, Seq, Par
from ..spork.spork_env import SporkEnv
from ..spork import actor_kinds


def sanitize_str(s):
Expand Down Expand Up @@ -531,6 +535,9 @@ def __init__(self, proc, ctxt_name, *, is_public_decl):
self.range_env = IndexRangeEnvironment(proc, fast=False)
self.names = ChainMap()
self.envtyp = dict()
self.spork = (
None # Set to SporkEnv only when compiling GPU kernel code, else None
)
self.mems = dict()
self._tab = ""
self._lines = []
Expand Down Expand Up @@ -651,7 +658,10 @@ def is_leaf_proc(stmts):

def add_line(self, line):
if line:
self._lines.append(self._tab + line)
if self.spork:
self.spork.kernel_lines.append(self._tab + line)
else:
self._lines.append(self._tab + line)

def comp_stmts(self, stmts):
for b in stmts:
Expand Down Expand Up @@ -822,9 +832,18 @@ def get_window_type(self, typ, is_const=None):
self.window_defns.add(win)
return win.name

def get_actor_kind(self):
return self.spork.get_actor_kind() if self.spork else actor_kinds.cpu

def comp_s(self, s):
if self.spork:
self.spork.on_comp_s(s)

if isinstance(s, LoopIR.Pass):
self.add_line("; // NO-OP")
elif isinstance(s, LoopIR.SyncStmt):
warnings.warn("Not implemented: compiling LoopIR.SyncStmt")
self.add_line(f"// TODO LoopIR.SyncStmt {s.A} // {s.B}")
elif isinstance(s, (LoopIR.Assign, LoopIR.Reduce)):
if s.name in self._scalar_refs:
lhs = f"*{self.env[s.name]}"
Expand Down Expand Up @@ -877,7 +896,19 @@ def comp_s(self, s):
name = self.new_varname(s.name, typ=s.rhs.type, mem=mem)
self.add_line(f"struct {win_struct} {name} = {rhs};")
elif isinstance(s, LoopIR.If):
cond = self.comp_e(s.cond)
is_lane_specialization = (
isinstance(s.cond, LoopIR.Const)
and s.cond.type == T.lane_specialization
)
if is_lane_specialization:
if not self.spork:
raise ValueError(
f"{s.srcinfo}: lane specialization outside GPU scope"
)
cond = self.spork.push_lane_specialization(s.cond.val)
else:
cond = self.comp_e(s.cond)

self.add_line(f"if ({cond}) {{")
self.push()
self.comp_stmts(s.body)
Expand All @@ -889,23 +920,65 @@ def comp_s(self, s):
self.pop()
self.add_line("}")

if is_lane_specialization:
self.spork.pop_lane_specialization()

elif isinstance(s, LoopIR.For):
lo = self.comp_e(s.lo)
hi = self.comp_e(s.hi)
self.push(only="env")
itr = self.new_varname(s.iter, typ=T.index) # allocate a new string
self.range_env.add_loop_iter(
sym_range = self.range_env.add_loop_iter(
s.iter,
s.lo,
s.hi,
)
if isinstance(s.loop_mode, LoopIR.Par):

loop_mode = s.loop_mode
old_actor_kind = self.get_actor_kind()
new_actor_kind = loop_mode.new_actor_kind(old_actor_kind)
emit_loop = True
starting_cuda_kernel = (
not self.spork and new_actor_kind is not actor_kinds.cpu
)
if not new_actor_kind.allows_parent(old_actor_kind):
raise TypeError(
f"{s.srcinfo}: cannot nest loop with actor kind {new_actor_kind} in {old_actor_kind} scope"
)
if starting_cuda_kernel:
self.spork = SporkEnv(
f"{self.proc.name}EXOcu_{s.srcinfo.lineno:04d}{itr}"
)
if self.spork:
self.spork.push_actor_kind(new_actor_kind)
if loop_mode.is_par:
emit_loop = self.spork.push_parallel_for(s)

if isinstance(loop_mode, Par):
assert emit_loop
self.add_line(f"#pragma omp parallel for")
self.add_line(f"for (int_fast32_t {itr} = {lo}; {itr} < {hi}; {itr}++) {{")

if emit_loop:
self.add_line(
f"for (int_fast32_t {itr} = {lo}; {itr} < {hi}; {itr}++) {{"
)

self.push(only="tab")
self.comp_stmts(s.body)
self.pop()
self.add_line("}")

if self.spork:
self.spork.pop_actor_kind()
if loop_mode.is_par:
self.spork.pop_parallel_for()

if starting_cuda_kernel:
# TODO compile kernel_lines into separate device function
self._lines.extend(self.spork.kernel_lines)
self.spork = None

if emit_loop:
self.pop()
self.add_line("}")

elif isinstance(s, LoopIR.Alloc):
name = self.new_varname(s.name, typ=s.type, mem=s.mem)
Expand Down Expand Up @@ -1017,6 +1090,8 @@ def comp_e(self, e, prec=0):
return f"{float(e.val)}"
elif e.type == T.f32:
return f"{float(e.val)}f"
elif e.type == T.lane_specialization:
assert False, "should be handled when compiling LoopIR.If"
else:
return f"(({e.type.ctype()}) {str(e.val)})"

Expand Down
1 change: 1 addition & 0 deletions src/exo/backend/mem_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ def mem_s(self, s):

if (
styp is LoopIR.Pass
or styp is LoopIR.SyncStmt
or styp is LoopIR.Assign
or styp is LoopIR.Reduce
or styp is LoopIR.WriteConfig
Expand Down
16 changes: 12 additions & 4 deletions src/exo/backend/parallel_analysis.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from ..core.LoopIR import LoopIR, LoopIR_Rewrite
from ..spork.loop_modes import LoopMode, Seq, Par

from ..rewrite.new_eff import Check_ParallelizeLoop
from ..rewrite.new_eff import Check_ParallelizeLoop, SchedulingError

import warnings


class ParallelAnalysis(LoopIR_Rewrite):
Expand All @@ -20,10 +23,15 @@ def err(self, node, msg):
self._errors.append(f"{node.srcinfo}: {msg}")

def map_s(self, s):
if isinstance(s, LoopIR.For) and isinstance(s.loop_mode, LoopIR.Par):
if isinstance(s, LoopIR.For) and not isinstance(s.loop_mode, Seq):
try:
Check_ParallelizeLoop(self.proc, s)
except:
if isinstance(s.loop_mode, Par):
Check_ParallelizeLoop(self.proc, s)
else:
warnings.warn(
f"Not implemented: data race check for {type(s.loop_mode)}"
)
except SchedulingError:
self.err(
s,
"parallel loop's body is not parallelizable because of potential data races",
Expand Down
Loading
Loading