diff --git a/loopy/kernel/dependency.py b/loopy/kernel/dependency.py new file mode 100644 index 000000000..5512c62a6 --- /dev/null +++ b/loopy/kernel/dependency.py @@ -0,0 +1,222 @@ +from __future__ import annotations + + +__copyright__ = "Copyright (C) 2025 Addison Alvey-Blanco" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +from typing import Mapping + +from constantdict import constantdict + +import islpy as isl +from islpy import dim_type + +from loopy import HappensAfter, LoopKernel, for_each_kernel +from loopy.kernel.instruction import ( + InstructionBase, + VariableSpecificHappensAfter, +) +from loopy.transform.dependency import AccessMapFinder + + +def _add_lexicographic_happens_after_inner(knl, after_insn, before_insn): + domain_before = knl.get_inames_domain(before_insn.within_inames) + domain_after = knl.get_inames_domain(after_insn.within_inames) + + happens_after = isl.Map.from_domain_and_range(domain_after, + domain_before) + for idim in range(happens_after.dim(dim_type.out)): + happens_after = happens_after.set_dim_name( + dim_type.out, + idim, + happens_after.get_dim_name(dim_type.out, idim) + "'" + ) + + shared_inames = before_insn.within_inames & after_insn.within_inames + + shared_inames_order_before = [ + domain_before.get_dim_name(dim_type.out, idim) + for idim in range(domain_before.dim(dim_type.out)) + if domain_before.get_dim_name(dim_type.out, idim) + in shared_inames + ] + + shared_inames_order_after = [ + domain_after.get_dim_name(dim_type.out, idim) + for idim in range(domain_after.dim(dim_type.out)) + if domain_after.get_dim_name(dim_type.out, idim) + in shared_inames + ] + + assert shared_inames_order_after == shared_inames_order_before + shared_inames_order = list(shared_inames_order_after) + + affs_in = isl.affs_from_space(happens_after.domain().space) + affs_out = isl.affs_from_space(happens_after.range().space) + + lex_map = isl.Map.empty(happens_after.space) + for iinnermost, innermost_iname in enumerate(shared_inames_order): + innermost_map = affs_in[innermost_iname].gt_map( + affs_out[innermost_iname + "'"] + ) + + for outer_iname in shared_inames_order[:iinnermost]: + innermost_map = innermost_map & ( + affs_in[outer_iname].eq_map( + affs_out[outer_iname + "'"] + ) + ) + + if before_insn != after_insn: + innermost_map = innermost_map | ( + affs_in[shared_inames_order[iinnermost]].eq_map( + affs_out[shared_inames_order[iinnermost] + "'"] + ) + ) + + lex_map = lex_map | innermost_map + + return happens_after & lex_map + + +@for_each_kernel +def add_lexicographic_happens_after(knl: LoopKernel) -> LoopKernel: + """ + Impose a sequential, top-down execution order to instructions in a program. + It is expected that this strict order will be relaxed with + :func:`reduce_strict_ordering_with_dependencies` using data dependencies. + """ + + rmap = knl.reader_map() + wmap_r: dict[str, set[str]] = {} + for var, insns in knl.writer_map().items(): + for insn in insns: + wmap_r.setdefault(insn, set()) + wmap_r[insn].add(var) + + new_insns = [] + for iafter, after_insn in enumerate(knl.instructions): + assert after_insn.id is not None + + new_happens_after = {} + + # check for self dependencies + for var in wmap_r[after_insn.id]: + if rmap.get(var) and after_insn.id in rmap[var]: + self_happens_after = _add_lexicographic_happens_after_inner( + knl, after_insn, after_insn + ) + new_happens_after[after_insn.id] = HappensAfter( + self_happens_after + ) + + if iafter != 0: + before_insn = knl.instructions[iafter - 1] + happens_after = _add_lexicographic_happens_after_inner( + knl, after_insn, before_insn + ) + new_happens_after[before_insn.id] = HappensAfter(happens_after) + + new_insns.append(after_insn.copy(happens_after=new_happens_after)) + + return knl.copy(instructions=new_insns) + + +@for_each_kernel +def reduce_strict_ordering(knl) -> LoopKernel: + def narrow_dependencies( + after: InstructionBase, + before: InstructionBase, + remaining_instances: isl.Set, # type: ignore + happens_afters: Mapping[str, VariableSpecificHappensAfter] = {}, + happens_after_map: isl.Map | None = None, # type: ignore + ) -> Mapping[str, VariableSpecificHappensAfter]: + # FIXME: can we get rid of all the "assert x is not None" stuff? + + assert isinstance(after.id, str) + assert isinstance(before.id, str) + + if remaining_instances.is_empty(): + return happens_afters + + for insn, happens_after in before.happens_after.items(): + if happens_after_map is None: + happens_after_map = happens_after.instances_rel + else: + assert happens_after.instances_rel is not None + happens_after_map = happens_after_map.apply_range( + happens_after.instances_rel) + + source_vars = access_mapper.get_accessed_variables(after.id) + common_vars = wmap_r[insn] & source_vars # type: ignore + for var in common_vars: + write_map = access_mapper.get_map(insn, var) + source_map = access_mapper.get_map(after.id, var) + + assert write_map is not None + assert source_map is not None + + source_to_writer = source_map.apply_range(write_map.reverse()) + dependency_map = source_to_writer & happens_after_map + remaining_instances = remaining_instances - dependency_map.domain() + if dependency_map is not None and not dependency_map.is_empty(): + happens_after_obj = VariableSpecificHappensAfter( + dependency_map, var + ) + + happens_afters = constantdict( + dict(happens_afters) | {insn: happens_after_obj}) + + if insn != after.id: + happens_afters = constantdict( + dict(happens_afters) | dict(narrow_dependencies( + after, + knl.id_to_insn[insn], + remaining_instances, + happens_afters, + happens_after_map, + )) + ) + + return happens_afters + + access_mapper = AccessMapFinder(knl) + for insn in knl.instructions: + access_mapper(insn.expression, insn.id) + access_mapper(insn.assignee, insn.id) + + wmap_r: dict[str, set[str]] = {} + for var, insns in knl.writer_map().items(): + for insn in insns: + wmap_r.setdefault(insn, set()) + wmap_r[insn].add(var) + + new_insns = [] + for insn in knl.instructions[::-1]: + new_insns.append( + insn.copy(happens_after=narrow_dependencies( + after=insn, + before=insn, + remaining_instances=knl.get_inames_domain(insn.within_inames))) + ) + + return knl.copy(instructions=new_insns[::-1]) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 604b581e3..ca84a6c18 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -99,36 +99,12 @@ class UseStreamingStoreTag(Tag): @dataclass(frozen=True) class HappensAfter: - """A class representing a "happens-after" relationship between two - statements found in a :class:`loopy.LoopKernel`. Used to validate that a - given kernel transformation respects the data dependencies in a given - program. + instances_rel: isl.Map | None # type: ignore - .. attribute:: variable_name - - The name of the variable responsible for the dependency. For - backward compatibility purposes, this may be *None*. In this case, the - dependency semantics revert to the deprecated, statement-level - dependencies of prior versions of :mod:`loopy`. - - .. attribute:: instances_rel - - An :class:`islpy.Map` representing the precise happens-after - relationship. The domain and range are sets of statement instances. The - instances in the domain are required to execute before the instances in - the range. - - Map dimensions are named according to the order of appearance of the - inames in a :mod:`loopy` program. The dimension names in the range are - appended with a prime to signify that the mapped instances are distinct. - - As a (deprecated) matter of backward compatibility, this may be *None*, - in which case the semantics revert to the (underspecified) - statement-level dependencies of prior versions of :mod:`loopy`. - """ +@dataclass(frozen=True) +class VariableSpecificHappensAfter(HappensAfter): variable_name: str | None - instances_rel: isl.Map | None # }}} @@ -335,14 +311,12 @@ def __init__(self, happens_after = constantdict({ after_id.strip(): HappensAfter( - variable_name=None, instances_rel=None) for after_id in happens_after.split(",") if after_id.strip()}) elif isinstance(happens_after, frozenset): happens_after = constantdict({ after_id: HappensAfter( - variable_name=None, instances_rel=None) for after_id in happens_after}) elif isinstance(happens_after, dict): diff --git a/loopy/transform/dependency.py b/loopy/transform/dependency.py new file mode 100644 index 000000000..39604f505 --- /dev/null +++ b/loopy/transform/dependency.py @@ -0,0 +1,125 @@ +from __future__ import annotations + + +""" +.. autoclass:: AccessMapFinder +""" +__copyright__ = "Copyright (C) 2022 Addison Alvey-Blanco" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +from typing import TYPE_CHECKING + +from pyrsistent import PMap, pmap + +import pymbolic.primitives as p + +from loopy.symbolic import ( + UnableToDetermineAccessRangeError, + WalkMapper, + get_access_map, +) + + +if TYPE_CHECKING: + import islpy as isl + + from loopy.kernel import LoopKernel + from loopy.typing import Expression + + +class AccessMapFinder(WalkMapper): + def __init__(self, knl: LoopKernel) -> None: + self.kernel = knl + self._access_maps: PMap[str, PMap[str, isl.Map]] = pmap({}) # type: ignore + from collections import defaultdict + + self.bad_subscripts: dict[str, list[Expression]] = defaultdict(list) + + super().__init__() + + def get_map(self, insn_id: str, variable_name: str) -> isl.Map | None: # type: ignore + """ + Retrieve an access map indexed by an instruction ID and variable + name. + """ + try: + return self._access_maps[insn_id][variable_name] + except KeyError: + return None + + def get_accessed_variables(self, insn_id: str) -> set[str] | None: + try: + return set(self._access_maps[insn_id].keys()) + except KeyError: + return None + + def map_subscript(self, expr, insn_id): + domain = self.kernel.get_inames_domain( + self.kernel.id_to_insn[insn_id].within_inames + ) + WalkMapper.map_subscript(self, expr, insn_id) + + assert isinstance(expr.aggregate, p.Variable) + + arg_name = expr.aggregate.name + subscript = expr.index_tuple + + try: + access_map = get_access_map(domain, subscript, self.kernel.assumptions) + except UnableToDetermineAccessRangeError: + # may not have enough info to generate access map at current point + self.bad_subscripts[arg_name].append(expr) + return + + # analyze what we have in our access map dict before storing map + insn_to_args = self._access_maps.get(insn_id) + if insn_to_args is not None: + existing_relation = insn_to_args.get(arg_name) + + if existing_relation is not None: + access_map |= existing_relation + + self._access_maps = self._access_maps.set( + insn_id, self._access_maps[insn_id].set(arg_name, access_map) + ) + + else: + self._access_maps = self._access_maps.set( + insn_id, pmap({arg_name: access_map}) + ) + + def map_linear_subscript(self, expr, insn_id): + raise NotImplementedError( + "linear subscripts cannot be used with " + "precise dependency finding. Use " + "multidimensional accesses to take advantage " + "of this feature." + ) + + def map_reduction(self, expr, insn_id): + return WalkMapper.map_reduction(self, expr, insn_id) + + def map_type_cast(self, expr, insn_id): + return self.rec(expr.child, insn_id) + + def map_sub_array_ref(self, expr, insn_id): + raise NotImplementedError("Not yet implemented") diff --git a/test/test_dependencies.py b/test/test_dependencies.py new file mode 100644 index 000000000..a9117515e --- /dev/null +++ b/test/test_dependencies.py @@ -0,0 +1,137 @@ +import sys + +import numpy as np +import pytest + +import pyopencl as cl +from pyopencl.tools import ( + pytest_generate_tests_for_pyopencl as pytest_generate_tests, # noqa +) + +import loopy as lp +from loopy.kernel.dependency import ( + add_lexicographic_happens_after, + reduce_strict_ordering, +) + + +def test_no_dependency(): + t_unit = lp.make_kernel( + "{ [i,j] : 0 <= i, j < n}", + """ + a[i,j] = 2*i {id=S} + b[i,j] = a[i+1,j+1] {id=T} + """, + ) + + t_unit = add_lexicographic_happens_after(t_unit) + t_unit = reduce_strict_ordering(t_unit) + knl = t_unit.default_entrypoint + + assert len(knl.id_to_insn["S"].happens_after) == 0 + assert len(knl.id_to_insn["T"].happens_after) == 0 + + +def test_odd_even_dependencies(): + t_unit = lp.make_kernel( + "{ [i] : 0 <= i < np }", + """ + u[2*i+1] = i {id=S} + u[2*i] = i {id=T} + u[i] = i {id=V} + """ + ) + + t_unit = add_lexicographic_happens_after(t_unit) + t_unit = reduce_strict_ordering(t_unit) + + knl = t_unit.default_entrypoint + assert "S" in knl.id_to_insn["V"].happens_after + assert "T" in knl.id_to_insn["V"].happens_after + for insn in knl.instructions: + print(f"{insn.id}:") + for insn_after, instances_rel in insn.happens_after.items(): + print(f" {insn_after}: {instances_rel}") + + +@pytest.mark.parametrize("img_size", [(512, 512), (1920, 1080)]) +def test_3x3_blur(ctx_factory, img_size): + ctx = ctx_factory() + queue = cl.CommandQueue(ctx) + + hx, hy = img_size + img = np.random.default_rng(seed=42).random(size=(hx, hy)) + + knl = lp.make_kernel( + "{ [x, y]: 0 <= x < hx and 0 <= y < hy }", + """ + img_(i, j) := img[i+1, j+1] + blurx(i, j) := img_(i-1, j) + img_(i, j) + img_(i+1, j) + + out[x, y] = blurx(x, y-1) + blurx(x, y) + blurx(x, y+1) + """, + [ + lp.GlobalArg("out", + dtype=np.float64, + shape=(hx, hy), + is_output=True), + lp.GlobalArg("img", + dtype=np.float64, + shape=(hx, hy)) + ] + ) + + knl = lp.fix_parameters(knl, hx=hx-2, hy=hy-2) + + knl = add_lexicographic_happens_after(knl) + knl = reduce_strict_ordering(knl) + + bsize = 4 + knl = lp.split_iname(knl, "x", bsize, inner_tag="vec", outer_tag="for") + knl = lp.split_iname(knl, "y", bsize, inner_tag="for", outer_tag="g.0") + knl = lp.precompute( + knl, + "blurx", + sweep_inames="x_inner, y_inner", + precompute_outer_inames="x_outer, y_outer", + precompute_inames="bx, by" + ) + + knl = lp.prioritize_loops(knl, "y_outer, x_outer, y_inner, x_inner") + knl = lp.expand_subst(knl) + + _, out = knl(queue, img=img) + blurx = np.zeros_like(img) + out_np = np.zeros_like(img) + for x in range(hx-2): + blurx[x, :] = img[x, :] + img[x+1, :] + img[x+2, :] + for y in range(hy-2): + out_np[:, y] = blurx[:, y] + blurx[:, y+1] + blurx[:, y+2] + + import numpy.linalg as la + assert (la.norm(out[0] - out_np) / la.norm(out_np)) <= 1e-14 + + +def test_self_dependence(): + t_unit = lp.make_kernel( + "[nt, nx] -> { [t, x]: 0 <= t < nt and 0 <= x < nx }", + """ + u[t+2,x+1] = 2*u[t+1,x+1] {id=self} + """ + ) + + t_unit = add_lexicographic_happens_after(t_unit) + t_unit = reduce_strict_ordering(t_unit) + + knl = t_unit.default_entrypoint + assert "self" in knl.instructions[0].happens_after.keys() + print(knl.id_to_insn["self"].happens_after["self"].instances_rel) + + +if __name__ == "__main__": + if len(sys.argv) > 1: + exec(sys.argv[1]) + else: + from pytest import main + + main([__file__])